diff --git a/clang/test/CodeGen/PowerPC/altivec.c b/clang/test/CodeGen/PowerPC/altivec.c index 808135de00ea6b8..6eb955dcdec204a 100644 --- a/clang/test/CodeGen/PowerPC/altivec.c +++ b/clang/test/CodeGen/PowerPC/altivec.c @@ -12,8 +12,8 @@ // RUN: %clang -S -emit-llvm -maltivec -mabi=vec-default -mcpu=pwr8 --target=powerpc64-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE // Check initialization -vector int test0 = (vector int)(1); // CHECK: @test0 ={{.*}} global <4 x i32> -vector float test1 = (vector float)(1.0); // CHECK: @test1 ={{.*}} global <4 x float> +vector int test0 = (vector int)(1); // CHECK: @test0 ={{.*}} global <4 x i32> splat (i32 1) +vector float test1 = (vector float)(1.0); // CHECK: @test1 ={{.*}} global <4 x float> splat (float 1.000000e+{{0+}}) // CHECK-BE: @v1 ={{.*}} global <16 x i8> // CHECK-LE: @v1 ={{.*}} global <16 x i8> @@ -32,8 +32,8 @@ void test2(void) { vector int vi; vector float vf; - vi = (vector int)(1); // CHECK: - vf = (vector float)(1.0); // CHECK: + vi = (vector int)(1); // CHECK: splat (i32 1) + vf = (vector float)(1.0); // CHECK: splat (float 1.000000e+{{0+}}) vi = (vector int)(1, 2, 3, 4); // CHECK: vi = (vector int)(1, 2, 3, 4, 5); // CHECK: @@ -46,9 +46,9 @@ void test2(void) // Check pre/post increment/decrement void test3(void) { vector int vi; - vi++; // CHECK: add <4 x i32> {{.*}} + vi++; // CHECK: add <4 x i32> {{.*}} splat (i32 1) vector unsigned int vui; - --vui; // CHECK: add <4 x i32> {{.*}} + --vui; // CHECK: add <4 x i32> {{.*}} splat (i32 -1) vector float vf; - vf++; // CHECK: fadd <4 x float> {{.*}} + vf++; // CHECK: fadd <4 x float> {{.*}} splat (float 1.000000e+{{0+}}) } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c b/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c index 90c28ddd316ee39..91d1ebd045c58ee 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c @@ -92,10 +92,10 @@ void test1() { // CHECK-LE: @llvm.ppc.altivec.vmaxsw vf = vec_abs(vf); -// CHECK: and <4 x i32> {{.*}}, +// CHECK: and <4 x i32> {{.*}}, splat (i32 2147483647) // CHECK: store <4 x float> %{{.*}}, ptr @vf // CHECK-LE: bitcast <4 x float> %{{.*}} to <4 x i32> -// CHECK-LE: and <4 x i32> {{.*}}, +// CHECK-LE: and <4 x i32> {{.*}}, splat (i32 2147483647) // CHECK-LE: bitcast <4 x i32> %{{.*}} to <4 x float> // CHECK-LE: store <4 x float> %{{.*}}, ptr @vf @@ -3502,39 +3502,39 @@ void test6() { /* vec_sl */ res_vsc = vec_sl(vsc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_sl(vuc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_sl(vs, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_sl(vus, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_sl(vi, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_sl(vui, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vsc = vec_vslb(vsc, vuc); @@ -4351,75 +4351,75 @@ void test6() { /* vec_sr */ res_vsc = vec_sr(vsc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_sr(vuc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_sr(vs, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_sr(vus, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_sr(vi, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_sr(vui, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vsc = vec_vsrb(vsc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_vsrb(vuc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_vsrh(vs, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_vsrh(vus, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_vsrw(vi, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_vsrw(vui, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] /* vec_sra */ @@ -5623,21 +5623,21 @@ void test6() { res_vi = vec_sube(vi, vi, vi); // CHECK: and <4 x i32> -// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK: add <4 x i32> // CHECK: add <4 x i32> // CHECK-LE: and <4 x i32> -// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, +// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK-LE: add <4 x i32> // CHECK-LE: add <4 x i32> res_vui = vec_sube(vui, vui, vui); // CHECK: and <4 x i32> -// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK: add <4 x i32> // CHECK: add <4 x i32> // CHECK-LE: and <4 x i32> -// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, +// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK-LE: add <4 x i32> // CHECK-LE: add <4 x i32> @@ -9407,7 +9407,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9415,7 +9415,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9423,7 +9423,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9431,7 +9431,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9439,7 +9439,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9447,7 +9447,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9455,7 +9455,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) } @@ -9529,10 +9529,10 @@ void test10() { vector float test_rsqrtf(vector float a, vector float b) { // CHECK-LABEL: test_rsqrtf // CHECK: call fast <4 x float> @llvm.sqrt.v4f32 - // CHECK: fdiv fast <4 x float> + // CHECK: fdiv fast <4 x float> splat (float 1.000000e+00) // CHECK-LE-LABEL: test_rsqrtf // CHECK-LE: call fast <4 x float> @llvm.sqrt.v4f32 - // CHECK-LE: fdiv fast <4 x float> + // CHECK-LE: fdiv fast <4 x float> splat (float 1.000000e+00) return vec_rsqrt(a); } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c index 5c1246c7a1e8165..1089115a4f96275 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c @@ -42,7 +42,7 @@ vector double test_flags_recipdivd() { // CHECK-LABEL: @test_flags_rsqrtf( // CHECK: [[TMP0:%.*]] = load <4 x float>, ptr @a, align 16 // CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) -// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <4 x float> , [[TMP1]] +// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <4 x float> splat (float 1.000000e+00), [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @b, align 16 // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RSQRT]], [[TMP2]] // CHECK-NEXT: ret <4 x float> [[ADD]] @@ -54,7 +54,7 @@ vector float test_flags_rsqrtf() { // CHECK-LABEL: @test_flags_rsqrtd( // CHECK: [[TMP0:%.*]] = load <2 x double>, ptr @d, align 16 // CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) -// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x double> , [[TMP1]] +// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x double> splat (double 1.000000e+00), [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @e, align 16 // CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RSQRT]], [[TMP2]] // CHECK-NEXT: ret <2 x double> [[ADD]] diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c index 151a29976d8ed52..abf6ed3d3b0d7a5 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c @@ -1535,17 +1535,17 @@ vector unsigned long long test_vec_extracth_ul(void) { } vector signed int test_vec_vec_splati_si(void) { - // CHECK: ret <4 x i32> + // CHECK: ret <4 x i32> splat (i32 -17) return vec_splati(-17); } vector unsigned int test_vec_vec_splati_ui(void) { - // CHECK: ret <4 x i32> + // CHECK: ret <4 x i32> splat (i32 16) return vec_splati(16U); } vector float test_vec_vec_splati_f(void) { - // CHECK: ret <4 x float> + // CHECK: ret <4 x float> splat (float 1.000000e+00) return vec_splati(1.0f); } @@ -1863,7 +1863,7 @@ vector bool __int128 test_vec_cmpeq_bool_int128(void) { vector bool __int128 test_vec_cmpne_s128(void) { // CHECK-LABEL: @test_vec_cmpne_s128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128> - // CHECK-NEXT: %not.i = xor <1 x i128> %4, + // CHECK-NEXT: %not.i = xor <1 x i128> %4, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> %not.i return vec_cmpne(vsi128a, vsi128b); } @@ -1871,7 +1871,7 @@ vector bool __int128 test_vec_cmpne_s128(void) { vector bool __int128 test_vec_cmpne_u128(void) { // CHECK-LABEL: @test_vec_cmpne_u128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %4, + // CHECK-NEXT: xor <1 x i128> %4, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmpne(vui128a, vui128b); } @@ -1879,7 +1879,7 @@ vector bool __int128 test_vec_cmpne_u128(void) { vector bool __int128 test_vec_cmpne_bool_int128(void) { // CHECK-LABEL: @test_vec_cmpne_bool_int128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %4, + // CHECK-NEXT: xor <1 x i128> %4, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmpne(vbi128a, vbi128b); } @@ -1915,7 +1915,7 @@ vector bool __int128 test_vec_cmplt_u128(void) { vector bool __int128 test_vec_cmpge_s128(void) { // CHECK-LABEL: @test_vec_cmpge_s128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtsq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %6, + // CHECK-NEXT: xor <1 x i128> %6, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmpge(vsi128a, vsi128b); } @@ -1923,7 +1923,7 @@ vector bool __int128 test_vec_cmpge_s128(void) { vector bool __int128 test_vec_cmpge_u128(void) { // CHECK-LABEL: @test_vec_cmpge_u128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtuq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %6, + // CHECK-NEXT: xor <1 x i128> %6, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmpge(vui128a, vui128b); } @@ -1931,7 +1931,7 @@ vector bool __int128 test_vec_cmpge_u128(void) { vector bool __int128 test_vec_cmple_s128(void) { // CHECK-LABEL: @test_vec_cmple_s128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtsq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %8, + // CHECK-NEXT: xor <1 x i128> %8, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmple(vsi128a, vsi128b); } @@ -1939,7 +1939,7 @@ vector bool __int128 test_vec_cmple_s128(void) { vector bool __int128 test_vec_cmple_u128(void) { // CHECK-LABEL: @test_vec_cmple_u128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtuq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %8, + // CHECK-NEXT: xor <1 x i128> %8, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmple(vui128a, vui128b); } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c index 69ce9d6214e3cce..435465ebbb4be47 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c @@ -675,15 +675,15 @@ void test1() { /* vec_sr */ res_vsll = vec_sr(vsll, vull); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64) // CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64) // CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vull = vec_sr(vull, vull); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64) // CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64) // CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] /* vec_sra */ @@ -831,299 +831,299 @@ void test1() { /* vec_nand */ res_vsc = vec_nand(vsc, vsc); // CHECK: [[T1:%.+]] = and <16 x i8> -// CHECK: xor <16 x i8> [[T1]], +// CHECK: xor <16 x i8> [[T1]], splat (i8 -1) // CHECK-LE: [[T1:%.+]] = and <16 x i8> -// CHECK-LE: xor <16 x i8> [[T1]], +// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1) // CHECK-PPC: error: call to undeclared function 'vec_nand' res_vbc = vec_nand(vbc, vbc); // CHECK: [[T1:%.+]] = and <16 x i8> -// CHECK: xor <16 x i8> [[T1]], +// CHECK: xor <16 x i8> [[T1]], splat (i8 -1) // CHECK-LE: [[T1:%.+]] = and <16 x i8> -// CHECK-LE: xor <16 x i8> [[T1]], +// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1) res_vuc = vec_nand(vuc, vuc); // CHECK: [[T1:%.+]] = and <16 x i8> -// CHECK: xor <16 x i8> [[T1]], +// CHECK: xor <16 x i8> [[T1]], splat (i8 -1) // CHECK-LE: [[T1:%.+]] = and <16 x i8> -// CHECK-LE: xor <16 x i8> [[T1]], +// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1) res_vss = vec_nand(vss, vss); // CHECK: [[T1:%.+]] = and <8 x i16> -// CHECK: xor <8 x i16> [[T1]], +// CHECK: xor <8 x i16> [[T1]], splat (i16 -1) // CHECK-LE: [[T1:%.+]] = and <8 x i16> -// CHECK-LE: xor <8 x i16> [[T1]], +// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1) res_vbs = vec_nand(vbs, vbs); // CHECK: [[T1:%.+]] = and <8 x i16> -// CHECK: xor <8 x i16> [[T1]], +// CHECK: xor <8 x i16> [[T1]], splat (i16 -1) // CHECK-LE: [[T1:%.+]] = and <8 x i16> -// CHECK-LE: xor <8 x i16> [[T1]], +// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1) res_vus = vec_nand(vus, vus); // CHECK: [[T1:%.+]] = and <8 x i16> -// CHECK: xor <8 x i16> [[T1]], +// CHECK: xor <8 x i16> [[T1]], splat (i16 -1) // CHECK-LE: [[T1:%.+]] = and <8 x i16> -// CHECK-LE: xor <8 x i16> [[T1]], +// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1) res_vsi = vec_nand(vsi, vsi); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) res_vbi = vec_nand(vbi, vbi); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) res_vui = vec_nand(vui, vui); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) res_vf = vec_nand(vfa, vfa); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) res_vsll = vec_nand(vsll, vsll); // CHECK: [[T1:%.+]] = and <2 x i64> -// CHECK: xor <2 x i64> [[T1]], +// CHECK: xor <2 x i64> [[T1]], splat (i64 -1) // CHECK-LE: [[T1:%.+]] = and <2 x i64> -// CHECK-LE: xor <2 x i64> [[T1]], +// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1) res_vbll = vec_nand(vbll, vbll); // CHECK: [[T1:%.+]] = and <2 x i64> -// CHECK: xor <2 x i64> [[T1]], +// CHECK: xor <2 x i64> [[T1]], splat (i64 -1) // CHECK-LE: [[T1:%.+]] = and <2 x i64> -// CHECK-LE: xor <2 x i64> [[T1]], +// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1) res_vull = vec_nand(vull, vull); // CHECK: [[T1:%.+]] = and <2 x i64> -// CHECK: xor <2 x i64> [[T1]], +// CHECK: xor <2 x i64> [[T1]], splat (i64 -1) // CHECK-LE: [[T1:%.+]] = and <2 x i64> -// CHECK-LE: xor <2 x i64> [[T1]], +// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1) res_vd = vec_nand(vda, vda); // CHECK: [[T1:%.+]] = and <2 x i64> -// CHECK: xor <2 x i64> [[T1]], +// CHECK: xor <2 x i64> [[T1]], splat (i64 -1) // CHECK-LE: [[T1:%.+]] = and <2 x i64> -// CHECK-LE: xor <2 x i64> [[T1]], +// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1) res_vf = vec_nand(vfa, vfa); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) /* vec_orc */ res_vsc = vec_orc(vsc, vsc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] // CHECK-PPC: error: call to undeclared function 'vec_orc' res_vsc = vec_orc(vsc, vbc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vsc = vec_orc(vbc, vsc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vuc = vec_orc(vuc, vuc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vuc = vec_orc(vuc, vbc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vuc = vec_orc(vbc, vuc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vbc = vec_orc(vbc, vbc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vss = vec_orc(vss, vss); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vss = vec_orc(vss, vbs); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vss = vec_orc(vbs, vss); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vus = vec_orc(vus, vus); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vus = vec_orc(vus, vbs); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vus = vec_orc(vbs, vus); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vbs = vec_orc(vbs, vbs); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vsi = vec_orc(vsi, vsi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vsi = vec_orc(vsi, vbi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vsi = vec_orc(vbi, vsi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vui = vec_orc(vui, vui); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vui = vec_orc(vui, vbi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vui = vec_orc(vbi, vui); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vbi = vec_orc(vbi, vbi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vf = vec_orc(vbi, vfa); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vf = vec_orc(vfa, vbi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vf = vec_orc(vfa, vfb); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vsll = vec_orc(vsll, vsll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vsll = vec_orc(vsll, vbll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vsll = vec_orc(vbll, vsll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vull = vec_orc(vull, vull); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vull = vec_orc(vull, vbll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vull = vec_orc(vbll, vull); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vbll = vec_orc(vbll, vbll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vd = vec_orc(vbll, vda); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vd = vec_orc(vda, vbll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vd = vec_orc(vda, vdb); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] /* vec_sub */ @@ -1250,7 +1250,7 @@ vector unsigned int test_vec_addec_unsigned (vector unsigned int a, vector unsig vector signed int test_vec_subec_signed (vector signed int a, vector signed int b, vector signed int c) { return vec_subec(a, b, c); // CHECK-LABEL: @test_vec_subec_signed -// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK: ret <4 x i32> } @@ -1258,7 +1258,7 @@ vector unsigned int test_vec_subec_unsigned (vector unsigned int a, vector unsig return vec_subec(a, b, c); // CHECK-LABEL: @test_vec_subec_unsigned -// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK: ret <4 x i32> } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c b/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c index 6030899a88357fa..01fa6c6ad17e0c9 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c @@ -216,7 +216,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK_PPC: error: call to 'vec_revb' is ambiguous diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c index 1fe56a820512d07..99524fa2f79d09b 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c @@ -240,11 +240,11 @@ void test1() { res_vd = vec_andc(vbll, vd); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK: xor <2 x i64> %{{[0-9]*}}, +// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> @@ -254,11 +254,11 @@ void test1() { res_vd = vec_andc(vd, vbll); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK: xor <2 x i64> %{{[0-9]*}}, +// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> @@ -267,7 +267,7 @@ void test1() { res_vd = vec_andc(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK: xor <2 x i64> %{{[0-9]*}}, +// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> @@ -927,10 +927,10 @@ void test1() { res_vd = vec_nor(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} -// CHECK-NEXT: xor <2 x i64> [[OR]], +// CHECK-NEXT: xor <2 x i64> [[OR]], splat (i64 -1) // CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK-LE: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} -// CHECK-LE-NEXT: xor <2 x i64> [[OR]], +// CHECK-LE-NEXT: xor <2 x i64> [[OR]], splat (i64 -1) /* vec_or */ res_vsll = vec_or(vsll, vsll); @@ -1037,12 +1037,12 @@ void test1() { // CHECK-LE: call void @dummy() res_vd = vec_sel(vd, vd, vbll); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> @@ -1053,73 +1053,73 @@ void test1() { // CHECK-LE: call void @dummy() res_vd = vec_sel(vd, vd, vull); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> res_vbll = vec_sel(vbll, vbll, vbll); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vbll = vec_sel(vbll, vbll, vull); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vsll = vec_sel(vsll, vsll, vbll); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vsll = vec_sel(vsll, vsll, vull); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vull = vec_sel(vull, vull, vbll); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vull = vec_sel(vull, vull, vull); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> @@ -1282,16 +1282,16 @@ void test1() { // CHECK-LE: fptosi <2 x double> %{{.*}} to <2 x i64> res_vsll = vec_ctsl(vf, 3); - // CHECK: fmul <4 x float> {{%.*}}, + // CHECK: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00) // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcvspsxds(<4 x float> - // CHECK-LE: fmul <4 x float> {{%.*}}, + // CHECK-LE: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00) // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcvspsxds(<4 x float> res_vsll = vec_ctsl(vd, 3); - // CHECK: fmul <2 x double> {{%.*}}, + // CHECK: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00) // CHECK: fptosi <2 x double> {{%.*}} to <2 x i64> - // CHECK-LE: fmul <2 x double> {{%.*}}, + // CHECK-LE: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00) // CHECK-LE: fptosi <2 x double> {{%.*}} to <2 x i64> res_vull = vec_ctu(vd, 0); @@ -1307,16 +1307,16 @@ void test1() { // CHECK-LE: fptoui <2 x double> %{{.*}} to <2 x i64> res_vull = vec_ctul(vf, 3); - // CHECK: fmul <4 x float> {{%.*}}, + // CHECK: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00) // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcvspuxds(<4 x float> - // CHECK-LE: fmul <4 x float> {{%.*}}, + // CHECK-LE: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00) // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcvspuxds(<4 x float> res_vull = vec_ctul(vd, 3); - // CHECK: fmul <2 x double> {{%.*}}, + // CHECK: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00) // CHECK: fptoui <2 x double> {{%.*}} to <2 x i64> - // CHECK-LE: fmul <2 x double> {{%.*}}, + // CHECK-LE: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00) // CHECK-LE: fptoui <2 x double> {{%.*}} to <2 x i64> res_vf = vec_ctf(vsll, 0); @@ -1345,29 +1345,29 @@ void test1() { res_vd = vec_ctd(vsll, 2); // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double> -// CHECK: fmul <2 x double> {{.*}} +// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01) // CHECK-LE: sitofp <2 x i64> %{{.*}} to <2 x double> -// CHECK-LE: fmul <2 x double> {{.*}} +// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01) res_vd = vec_ctd(vull, 2); // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double> -// CHECK: fmul <2 x double> {{.*}} +// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01) // CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double> -// CHECK-LE: fmul <2 x double> {{.*}} +// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01) res_vd = vec_ctd(vsi, 2); // CHECK: call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> -// CHECK: fmul <2 x double> {{.*}} +// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01) // CHECK-LE: vperm // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> -// CHECK-LE: fmul <2 x double> {{.*}} +// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01) res_vd = vec_ctd(vui, 2); // CHECK: call <2 x double> @llvm.ppc.vsx.xvcvuxwdp(<4 x i32> -// CHECK: fmul <2 x double> {{.*}} +// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01) // CHECK-LE: vperm // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvcvuxwdp(<4 x i32> -// CHECK-LE: fmul <2 x double> {{.*}} +// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01) res_vsll = vec_signed(vd); // CHECK: fptosi <2 x double> @@ -1781,7 +1781,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -1789,7 +1789,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -1797,7 +1797,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -1805,7 +1805,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -2352,10 +2352,10 @@ vector double test_recipdivd(vector double a, vector double b) { vector double test_rsqrtd(vector double a, vector double b) { // CHECK-LABEL: test_rsqrtd // CHECK: call fast <2 x double> @llvm.sqrt.v2f64 - // CHECK: fdiv fast <2 x double> + // CHECK: fdiv fast <2 x double> splat (double 1.000000e+00), // CHECK-LE-LABEL: test_rsqrtd // CHECK-LE: call fast <2 x double> @llvm.sqrt.v2f64 - // CHECK-LE: fdiv fast <2 x double> + // CHECK-LE: fdiv fast <2 x double> splat (double 1.000000e+00) return vec_rsqrt(a); } @@ -2448,7 +2448,7 @@ void test_p8overloads_backwards_compat() { // CHECK: and <4 x i32> // CHECK: or <4 x i32> // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK: xor <2 x i64> {{%.*}}, + // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw @@ -2456,7 +2456,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: and <4 x i32> // CHECK-LE: or <4 x i32> // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: xor <2 x i64> {{%.*}}, + // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1) res_vbll = vec_cmpge(vull, vull); // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw @@ -2464,14 +2464,14 @@ void test_p8overloads_backwards_compat() { // CHECK: and <4 x i32> // CHECK: or <4 x i32> // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK: xor <2 x i64> {{%.*}}, + // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> // CHECK-LE: and <4 x i32> // CHECK-LE: or <4 x i32> // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: xor <2 x i64> {{%.*}}, + // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1) dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() @@ -2516,7 +2516,7 @@ void test_p8overloads_backwards_compat() { // CHECK: and <4 x i32> // CHECK: or <4 x i32> // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK: xor <2 x i64> {{%.*}}, + // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw @@ -2524,7 +2524,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: and <4 x i32> // CHECK-LE: or <4 x i32> // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: xor <2 x i64> {{%.*}}, + // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1) res_vbll = vec_cmple(vull, vull); // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw @@ -2532,20 +2532,20 @@ void test_p8overloads_backwards_compat() { // CHECK: and <4 x i32> // CHECK: or <4 x i32> // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK: xor <2 x i64> {{%.*}}, + // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> // CHECK-LE: and <4 x i32> // CHECK-LE: or <4 x i32> // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: xor <2 x i64> {{%.*}}, + // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1) dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vsll = vec_sl(vsll, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> @@ -2553,7 +2553,7 @@ void test_p8overloads_backwards_compat() { // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> @@ -2562,7 +2562,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> res_vull = vec_sl(vull, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> @@ -2570,7 +2570,7 @@ void test_p8overloads_backwards_compat() { // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> @@ -2583,7 +2583,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: call void @dummy() res_vsll = vec_sr(vsll, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr @@ -2591,7 +2591,7 @@ void test_p8overloads_backwards_compat() { // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr @@ -2600,7 +2600,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> res_vull = vec_sr(vull, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr @@ -2608,7 +2608,7 @@ void test_p8overloads_backwards_compat() { // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr @@ -2621,14 +2621,14 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: call void @dummy() res_vsll = vec_sra(vsll, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: ashr <2 x i64> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: ashr <2 x i64> res_vull = vec_sra(vull, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: ashr <2 x i64> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: ashr <2 x i64> /* ----------------------- predicates --------------------------- */ diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c index 0d50c91e1250c89..702714a4e786d5d 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c @@ -27,32 +27,32 @@ void test() { res_vf = vec_ctf(vsll, 4); // CHECK: [[TMP0:%.*]] = load <2 x i64>, ptr @vsll, align 16 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.ppc.vsx.xvcvsxdsp(<2 x i64> [[TMP0]]) -// CHECK-NEXT: fmul <4 x float> [[TMP1]], +// CHECK-NEXT: fmul <4 x float> [[TMP1]], splat (float 6.250000e-02) // NOCOMPAT: [[TMP0:%.*]] = load <2 x i64>, ptr @vsll, align 16 // NOCOMPAT-NEXT: [[CONV:%.*]] = sitofp <2 x i64> [[TMP0]] to <2 x double> -// NOCOMPAT-NEXT: fmul <2 x double> [[CONV]], +// NOCOMPAT-NEXT: fmul <2 x double> [[CONV]], splat (double 6.250000e-02) res_vf = vec_ctf(vull, 4); // CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr @vull, align 16 // CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.ppc.vsx.xvcvuxdsp(<2 x i64> [[TMP2]]) -// CHECK-NEXT: fmul <4 x float> [[TMP3]], +// CHECK-NEXT: fmul <4 x float> [[TMP3]], splat (float 6.250000e-02) // NOCOMPAT: [[TMP2:%.*]] = load <2 x i64>, ptr @vull, align 16 // NOCOMPAT-NEXT: [[CONV1:%.*]] = uitofp <2 x i64> [[TMP2]] to <2 x double> -// NOCOMPAT-NEXT: fmul <2 x double> [[CONV1]], +// NOCOMPAT-NEXT: fmul <2 x double> [[CONV1]], splat (double 6.250000e-02) res_vsll = vec_cts(vd, 4); // CHECK: [[TMP4:%.*]] = load <2 x double>, ptr @vd, align 16 -// CHECK-NEXT: fmul <2 x double> [[TMP4]], +// CHECK-NEXT: fmul <2 x double> [[TMP4]], splat (double 1.600000e+01) // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcvdpsxws(<2 x double> // NOCOMPAT: [[TMP4:%.*]] = load <2 x double>, ptr @vd, align 16 -// NOCOMPAT-NEXT: fmul <2 x double> [[TMP4]], +// NOCOMPAT-NEXT: fmul <2 x double> [[TMP4]], splat (double 1.600000e+01) res_vull = vec_ctu(vd, 4); // CHECK: [[TMP8:%.*]] = load <2 x double>, ptr @vd, align 16 -// CHECK-NEXT: fmul <2 x double> [[TMP8]], +// CHECK-NEXT: fmul <2 x double> [[TMP8]], splat (double 1.600000e+01) // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double> // NOCOMPAT: [[TMP7:%.*]] = load <2 x double>, ptr @vd, align 16 -// NOCOMPAT-NEXT: fmul <2 x double> [[TMP7]], +// NOCOMPAT-NEXT: fmul <2 x double> [[TMP7]], splat (double 1.600000e+01) res_vd = vec_round(vd); // CHECK: call double @llvm.ppc.readflm() diff --git a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c index 4c4d0dfce05eafc..00de972009eb2fd 100644 --- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c @@ -438,7 +438,7 @@ test_converts() { // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtepi32_pd // CHECK: call <2 x i64> @vec_unpackh(int vector[4]) // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z_.]+}} to <2 x double> -// CHECK: fmul <2 x double> %[[CONV]], +// CHECK: fmul <2 x double> %[[CONV]], splat (double 1.000000e+00) // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtepi32_ps // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) @@ -466,7 +466,7 @@ test_converts() { // CHECK: call <2 x i64> @vec_splats(unsigned long long) // CHECK: call <2 x i64> @vec_unpackl(int vector[4]) // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z._]+}} to <2 x double> -// CHECK: fmul <2 x double> %[[CONV]], +// CHECK: fmul <2 x double> %[[CONV]], splat (double 1.000000e+00) // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtps_epi32 // CHECK: call <4 x float> @vec_rint(float vector[4]) @@ -1084,23 +1084,23 @@ test_sll() { // CHECK-LABEL: @test_sll // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi16 -// CHECK: store <8 x i16> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int) // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int) -// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef ) +// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) // CHECK: call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8]) // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi32 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) -// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef splat (i32 32)) // CHECK: call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4]) // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi64 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0) -// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef ) +// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef splat (i64 64)) // CHECK: call <2 x i64> @vec_sl(unsigned long long vector[2], unsigned long long vector[2]) // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2]) @@ -1179,21 +1179,21 @@ test_sra() { // CHECK-LABEL: @test_sra // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi16 -// CHECK: store <8 x i16> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3) -// CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef ) +// CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi32 -// CHECK: store <4 x i32> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <4 x i32> splat (i32 31), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) -// CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 31)) // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi16 -// CHECK: store <8 x i16> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 // CHECK: br i1 %[[CMP]] // CHECK: call i1 @llvm.is.constant @@ -1204,7 +1204,7 @@ test_sra() { // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi32 -// CHECK: store <4 x i32> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <4 x i32> splat (i32 31), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32 // CHECK: br i1 %[[CMP]] // CHECK: call i1 @llvm.is.constant @@ -1230,23 +1230,23 @@ test_srl() { // CHECK-LABEL: @test_srl // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi16 -// CHECK: store <8 x i16> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3) -// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef ) +// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) // CHECK: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8]) // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi32 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) -// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 32)) // CHECK: call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4]) // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi64 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) -// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef ) +// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef splat (i64 64)) // CHECK: call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2]) // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2]) diff --git a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c index 8681bb3f9b4f8c6..66dfdd48db0ffde 100644 --- a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c @@ -218,30 +218,30 @@ test_cmp() { // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ps // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef , <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef , <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4]) // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ss // CHECK: call <4 x float> @vec_abs(float vector[4]) // CHECK: call <4 x float> @vec_abs(float vector[4]) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef , <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef , <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ps // CHECK: call <4 x float> @vec_abs(float vector[4]) // CHECK: call <4 x float> @vec_abs(float vector[4]) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4]) // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ss // CHECK: call <4 x float> @vec_abs(float vector[4]) // CHECK: call <4 x float> @vec_abs(float vector[4]) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4]) // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) diff --git a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c index 1c6dea34f5c3991..a929c80d5ebe7a6 100644 --- a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c +++ b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c @@ -51,7 +51,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) { vbi8_1 = (vector bool char)'a'; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned char' // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned char' (vector of 16 'unsigned char' values) and integer type 'int' of different size - // XL: + // XL: splat (i8 97) char c = 'c'; vbi8_2 = (vector bool char)c; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned char' @@ -64,7 +64,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) { vbi16_1 = (vector bool short)5; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned short' // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned short' (vector of 8 'unsigned short' values) and integer type 'int' of different size - // XL: + // XL: splat (i16 5) short si16 = 55; vbi16_2 = (vector bool short)si16; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned short' @@ -77,7 +77,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) { vbi32_1 = (vector bool int)9; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned int' // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned int' (vector of 4 'unsigned int' values) and integer type 'int' of different size - // XL: + // XL: splat (i32 9) int si32 = 99; vbi32_2 = (vector bool int)si32; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned int' @@ -90,7 +90,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) { vbi64_1 = (vector bool long long)13; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long' // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long' (vector of 2 'unsigned long long' values) and integer type 'int' of different size - // XL: + // XL: splat (i64 13) long long si64 = 1313; vbi64_2 = (vector bool long long)si64; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long' @@ -103,5 +103,5 @@ void test_vector_bool_pixel_init_no_parentheses(void) { p1 = (vector pixel)1; // MIXED-ERR: error: invalid conversion between vector type '__vector __pixel ' // GCC-ERR: error: invalid conversion between vector type '__vector __pixel ' (vector of 8 'unsigned short' values) and integer type 'int' of different size - // XL: + // XL: splat (i16 1) } diff --git a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c index 5057a4d593daae7..379e8afe1126f86 100644 --- a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c +++ b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c @@ -50,7 +50,7 @@ void test_vector_bool_pixel_init(void) { // vector bool char initialization vbi8_1 = (vector bool char)('a'); // MIXED: - // XL: + // XL: splat (i8 97) // GCC: error: invalid conversion between vector type '__vector __bool unsigned char' (vector of 16 'unsigned char' values) and integer type 'unsigned char' of different size char c = 'c'; vbi8_2 = (vector bool char)(c); @@ -64,7 +64,7 @@ void test_vector_bool_pixel_init(void) { // vector bool short initialization vbi16_1 = (vector bool short)(5); // MIXED: - // XL: + // XL: splat (i16 5) // GCC: error: invalid conversion between vector type '__vector __bool unsigned short' (vector of 8 'unsigned short' values) and integer type 'unsigned short' of different size short si16 = 55; vbi16_2 = (vector bool short)(si16); @@ -78,7 +78,7 @@ void test_vector_bool_pixel_init(void) { // vector bool int initialization vbi32_1 = (vector bool int)(9); // MIXED: - // XL: + // XL: splat (i32 9) // GCC: error: invalid conversion between vector type '__vector __bool unsigned int' (vector of 4 'unsigned int' values) and integer type 'unsigned int' of different size int si32 = 99; vbi32_2 = (vector bool int)(si32); @@ -92,7 +92,7 @@ void test_vector_bool_pixel_init(void) { // vector bool long long initialization vbi64_1 = (vector bool long long)(13); // MIXED: - // XL: + // XL: splat (i64 13) // GCC: error: invalid conversion between vector type '__vector __bool unsigned long long' (vector of 2 'unsigned long long' values) and integer type 'unsigned long long' of different size long long si64 = 1313; vbi64_2 = (vector bool long long)(si64); @@ -106,6 +106,6 @@ void test_vector_bool_pixel_init(void) { // vector pixel initialization p1 = (vector pixel)(1); // MIXED: - // XL: + // XL: splat (i16 1) // GCC: error: invalid conversion between vector type '__vector __pixel ' (vector of 8 'unsigned short' values) and integer type 'unsigned short' of different size } diff --git a/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c b/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c index bb97707a7a9a13f..0c81adcfe0b0b0f 100644 --- a/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c +++ b/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c @@ -331,7 +331,7 @@ fixed_uint64m1_t xor_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { // CHECK-LABEL: @not_i8( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -342,7 +342,7 @@ fixed_int8m1_t not_i8(fixed_int8m1_t a) { // CHECK-LABEL: @not_i16( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -353,7 +353,7 @@ fixed_int16m1_t not_i16(fixed_int16m1_t a) { // CHECK-LABEL: @not_i32( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -364,7 +364,7 @@ fixed_int32m1_t not_i32(fixed_int32m1_t a) { // CHECK-LABEL: @not_i64( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], splat (i64 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -375,7 +375,7 @@ fixed_int64m1_t not_i64(fixed_int64m1_t a) { // CHECK-LABEL: @not_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -386,7 +386,7 @@ fixed_uint8m1_t not_u8(fixed_uint8m1_t a) { // CHECK-LABEL: @not_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -397,7 +397,7 @@ fixed_uint16m1_t not_u16(fixed_uint16m1_t a) { // CHECK-LABEL: @not_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -408,7 +408,7 @@ fixed_uint32m1_t not_u32(fixed_uint32m1_t a) { // CHECK-LABEL: @not_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], splat (i64 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c index 54a3365e03e32d1..6a1f8f0e923f650 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c @@ -238,19 +238,19 @@ void test_float(void) { // (emulated) vd = vec_ctd(vsl, 1); // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) - // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) + // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 5.000000e-01), metadata !{{.*}}) // (emulated) vd = vec_ctd(vul, 1); // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) - // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) + // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 5.000000e-01), metadata !{{.*}}) // (emulated) vd = vec_ctd(vsl, 31); // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) - // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) + // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 0x3E00000000000000), metadata !{{.*}}) // (emulated) vd = vec_ctd(vul, 31); // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) - // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) + // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 0x3E00000000000000), metadata !{{.*}}) // (emulated) vsl = vec_ctsl(vd, 0); @@ -260,19 +260,19 @@ void test_float(void) { // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %{{.*}}, metadata !{{.*}}) // (emulated) vsl = vec_ctsl(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> {{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> {{.*}}, <2 x double> splat (double 2.000000e+00), metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vul = vec_ctul(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 2.000000e+00), metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vsl = vec_ctsl(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 0x41E0000000000000), metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vul = vec_ctul(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 0x41E0000000000000), metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c index 33f3dce73baea43..06fc1ee05d67ff9 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c @@ -701,32 +701,32 @@ void test_core(void) { vuc = vec_genmask(0x8000); // CHECK: <16 x i8> vuc = vec_genmask(0xffff); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -1) vuc = vec_genmasks_8(0, 7); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -1) vuc = vec_genmasks_8(1, 4); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 120) vuc = vec_genmasks_8(6, 2); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -29) vus = vec_genmasks_16(0, 15); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 -1) vus = vec_genmasks_16(2, 11); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 16368) vus = vec_genmasks_16(9, 2); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 -8065) vui = vec_genmasks_32(0, 31); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 -1) vui = vec_genmasks_32(7, 20); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 33552384) vui = vec_genmasks_32(25, 4); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 -134217601) vul = vec_genmasks_64(0, 63); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 -1) vul = vec_genmasks_64(3, 40); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 2305843009205305344) vul = vec_genmasks_64(30, 11); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 -4503582447501313) vsc = vec_splat(vsc, 0); // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer @@ -808,37 +808,37 @@ void test_core(void) { // CHECK-ASM: vrepg vsc = vec_splat_s8(-128); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -128) vsc = vec_splat_s8(127); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 127) vuc = vec_splat_u8(1); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 1) vuc = vec_splat_u8(254); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -2) vss = vec_splat_s16(-32768); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 -32768) vss = vec_splat_s16(32767); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 32767) vus = vec_splat_u16(1); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 1) vus = vec_splat_u16(65534); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 -2) vsi = vec_splat_s32(-32768); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 -32768) vsi = vec_splat_s32(32767); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 32767) vui = vec_splat_u32(-32768); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 -32768) vui = vec_splat_u32(32767); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 32767) vsl = vec_splat_s64(-32768); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 -32768) vsl = vec_splat_s64(32767); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 32767) vul = vec_splat_u64(-32768); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 -32768) vul = vec_splat_u64(32767); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 32767) vsc = vec_splats(sc); // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer @@ -4471,19 +4471,19 @@ void test_float(void) { // (emulated) vd = vec_ctd(vsl, 1); // CHECK: [[VAL:%[^ ]+]] = sitofp <2 x i64> %{{.*}} to <2 x double> - // CHECK: fmul <2 x double> [[VAL]], + // CHECK: fmul <2 x double> [[VAL]], splat (double 5.000000e-01) // (emulated) vd = vec_ctd(vul, 1); // CHECK: [[VAL:%[^ ]+]] = uitofp <2 x i64> %{{.*}} to <2 x double> - // CHECK: fmul <2 x double> [[VAL]], + // CHECK: fmul <2 x double> [[VAL]], splat (double 5.000000e-01) // (emulated) vd = vec_ctd(vsl, 31); // CHECK: [[VAL:%[^ ]+]] = sitofp <2 x i64> %{{.*}} to <2 x double> - // CHECK: fmul <2 x double> [[VAL]], + // CHECK: fmul <2 x double> [[VAL]], splat (double 0x3E00000000000000) // (emulated) vd = vec_ctd(vul, 31); // CHECK: [[VAL:%[^ ]+]] = uitofp <2 x i64> %{{.*}} to <2 x double> - // CHECK: fmul <2 x double> [[VAL]], + // CHECK: fmul <2 x double> [[VAL]], splat (double 0x3E00000000000000) // (emulated) vsl = vec_ctsl(vd, 0); @@ -4493,19 +4493,19 @@ void test_float(void) { // CHECK: fptoui <2 x double> %{{.*}} to <2 x i64> // (emulated) vsl = vec_ctsl(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, + // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 2.000000e+00) // CHECK: fptosi <2 x double> [[VAL]] to <2 x i64> // (emulated) vul = vec_ctul(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, + // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 2.000000e+00) // CHECK: fptoui <2 x double> [[VAL]] to <2 x i64> // (emulated) vsl = vec_ctsl(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, + // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 0x41E0000000000000) // CHECK: fptosi <2 x double> [[VAL]] to <2 x i64> // (emulated) vul = vec_ctul(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, + // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 0x41E0000000000000) // CHECK: fptoui <2 x double> [[VAL]] to <2 x i64> // (emulated) diff --git a/clang/test/CodeGen/SystemZ/zvector.c b/clang/test/CodeGen/SystemZ/zvector.c index 2720770624fb67d..4b28e394b33b724 100644 --- a/clang/test/CodeGen/SystemZ/zvector.c +++ b/clang/test/CodeGen/SystemZ/zvector.c @@ -124,31 +124,31 @@ void test_neg(void) { // CHECK-LABEL: define{{.*}} void @test_preinc() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[INC:%.*]] = add <16 x i8> [[TMP0]], +// CHECK: [[INC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 1) // CHECK: store volatile <16 x i8> [[INC]], ptr @sc2, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[INC1:%.*]] = add <16 x i8> [[TMP1]], +// CHECK: [[INC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 1) // CHECK: store volatile <16 x i8> [[INC1]], ptr @uc2, align 8 // CHECK: [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[INC2:%.*]] = add <8 x i16> [[TMP2]], +// CHECK: [[INC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 1) // CHECK: store volatile <8 x i16> [[INC2]], ptr @ss2, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[INC3:%.*]] = add <8 x i16> [[TMP3]], +// CHECK: [[INC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 1) // CHECK: store volatile <8 x i16> [[INC3]], ptr @us2, align 8 // CHECK: [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[INC4:%.*]] = add <4 x i32> [[TMP4]], +// CHECK: [[INC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 1) // CHECK: store volatile <4 x i32> [[INC4]], ptr @si2, align 8 // CHECK: [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[INC5:%.*]] = add <4 x i32> [[TMP5]], +// CHECK: [[INC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 1) // CHECK: store volatile <4 x i32> [[INC5]], ptr @ui2, align 8 // CHECK: [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[INC6:%.*]] = add <2 x i64> [[TMP6]], +// CHECK: [[INC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 1) // CHECK: store volatile <2 x i64> [[INC6]], ptr @sl2, align 8 // CHECK: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[INC7:%.*]] = add <2 x i64> [[TMP7]], +// CHECK: [[INC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 1) // CHECK: store volatile <2 x i64> [[INC7]], ptr @ul2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8 -// CHECK: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], +// CHECK: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00) // CHECK: store volatile <2 x double> [[INC8]], ptr @fd2, align 8 // CHECK: ret void void test_preinc(void) { @@ -170,31 +170,31 @@ void test_preinc(void) { // CHECK-LABEL: define{{.*}} void @test_postinc() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[INC:%.*]] = add <16 x i8> [[TMP0]], +// CHECK: [[INC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 1) // CHECK: store volatile <16 x i8> [[INC]], ptr @sc2, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[INC1:%.*]] = add <16 x i8> [[TMP1]], +// CHECK: [[INC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 1) // CHECK: store volatile <16 x i8> [[INC1]], ptr @uc2, align 8 // CHECK: [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[INC2:%.*]] = add <8 x i16> [[TMP2]], +// CHECK: [[INC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 1) // CHECK: store volatile <8 x i16> [[INC2]], ptr @ss2, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[INC3:%.*]] = add <8 x i16> [[TMP3]], +// CHECK: [[INC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 1) // CHECK: store volatile <8 x i16> [[INC3]], ptr @us2, align 8 // CHECK: [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[INC4:%.*]] = add <4 x i32> [[TMP4]], +// CHECK: [[INC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 1) // CHECK: store volatile <4 x i32> [[INC4]], ptr @si2, align 8 // CHECK: [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[INC5:%.*]] = add <4 x i32> [[TMP5]], +// CHECK: [[INC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 1) // CHECK: store volatile <4 x i32> [[INC5]], ptr @ui2, align 8 // CHECK: [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[INC6:%.*]] = add <2 x i64> [[TMP6]], +// CHECK: [[INC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 1) // CHECK: store volatile <2 x i64> [[INC6]], ptr @sl2, align 8 // CHECK: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[INC7:%.*]] = add <2 x i64> [[TMP7]], +// CHECK: [[INC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 1) // CHECK: store volatile <2 x i64> [[INC7]], ptr @ul2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8 -// CHECK: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], +// CHECK: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00) // CHECK: store volatile <2 x double> [[INC8]], ptr @fd2, align 8 // CHECK: ret void void test_postinc(void) { @@ -216,31 +216,31 @@ void test_postinc(void) { // CHECK-LABEL: define{{.*}} void @test_predec() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[DEC:%.*]] = add <16 x i8> [[TMP0]], +// CHECK: [[DEC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[DEC]], ptr @sc2, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[DEC1:%.*]] = add <16 x i8> [[TMP1]], +// CHECK: [[DEC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[DEC1]], ptr @uc2, align 8 // CHECK: [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[DEC2:%.*]] = add <8 x i16> [[TMP2]], +// CHECK: [[DEC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[DEC2]], ptr @ss2, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[DEC3:%.*]] = add <8 x i16> [[TMP3]], +// CHECK: [[DEC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[DEC3]], ptr @us2, align 8 // CHECK: [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[DEC4:%.*]] = add <4 x i32> [[TMP4]], +// CHECK: [[DEC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[DEC4]], ptr @si2, align 8 // CHECK: [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[DEC5:%.*]] = add <4 x i32> [[TMP5]], +// CHECK: [[DEC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[DEC5]], ptr @ui2, align 8 // CHECK: [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[DEC6:%.*]] = add <2 x i64> [[TMP6]], +// CHECK: [[DEC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[DEC6]], ptr @sl2, align 8 // CHECK: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], +// CHECK: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[DEC7]], ptr @ul2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8 -// CHECK: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], +// CHECK: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double -1.000000e+00) // CHECK: store volatile <2 x double> [[DEC8]], ptr @fd2, align 8 // CHECK: ret void void test_predec(void) { @@ -262,31 +262,31 @@ void test_predec(void) { // CHECK-LABEL: define{{.*}} void @test_postdec() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[DEC:%.*]] = add <16 x i8> [[TMP0]], +// CHECK: [[DEC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[DEC]], ptr @sc2, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[DEC1:%.*]] = add <16 x i8> [[TMP1]], +// CHECK: [[DEC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[DEC1]], ptr @uc2, align 8 // CHECK: [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[DEC2:%.*]] = add <8 x i16> [[TMP2]], +// CHECK: [[DEC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[DEC2]], ptr @ss2, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[DEC3:%.*]] = add <8 x i16> [[TMP3]], +// CHECK: [[DEC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[DEC3]], ptr @us2, align 8 // CHECK: [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[DEC4:%.*]] = add <4 x i32> [[TMP4]], +// CHECK: [[DEC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[DEC4]], ptr @si2, align 8 // CHECK: [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[DEC5:%.*]] = add <4 x i32> [[TMP5]], +// CHECK: [[DEC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[DEC5]], ptr @ui2, align 8 // CHECK: [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[DEC6:%.*]] = add <2 x i64> [[TMP6]], +// CHECK: [[DEC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[DEC6]], ptr @sl2, align 8 // CHECK: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], +// CHECK: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[DEC7]], ptr @ul2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8 -// CHECK: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], +// CHECK: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double -1.000000e+00) // CHECK: store volatile <2 x double> [[DEC8]], ptr @fd2, align 8 // CHECK: ret void void test_postdec(void) { @@ -1086,40 +1086,40 @@ void test_rem_assign(void) { // CHECK-LABEL: define{{.*}} void @test_not() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[NEG:%.*]] = xor <16 x i8> [[TMP0]], +// CHECK: [[NEG:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[NEG]], ptr @sc, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[NEG1:%.*]] = xor <16 x i8> [[TMP1]], +// CHECK: [[NEG1:%.*]] = xor <16 x i8> [[TMP1]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[NEG1]], ptr @uc, align 8 // CHECK: [[TMP2:%.*]] = load volatile <16 x i8>, ptr @bc2, align 8 -// CHECK: [[NEG2:%.*]] = xor <16 x i8> [[TMP2]], +// CHECK: [[NEG2:%.*]] = xor <16 x i8> [[TMP2]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[NEG2]], ptr @bc, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[NEG3:%.*]] = xor <8 x i16> [[TMP3]], +// CHECK: [[NEG3:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[NEG3]], ptr @ss, align 8 // CHECK: [[TMP4:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[NEG4:%.*]] = xor <8 x i16> [[TMP4]], +// CHECK: [[NEG4:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[NEG4]], ptr @us, align 8 // CHECK: [[TMP5:%.*]] = load volatile <8 x i16>, ptr @bs2, align 8 -// CHECK: [[NEG5:%.*]] = xor <8 x i16> [[TMP5]], +// CHECK: [[NEG5:%.*]] = xor <8 x i16> [[TMP5]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[NEG5]], ptr @bs, align 8 // CHECK: [[TMP6:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[NEG6:%.*]] = xor <4 x i32> [[TMP6]], +// CHECK: [[NEG6:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[NEG6]], ptr @si, align 8 // CHECK: [[TMP7:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[NEG7:%.*]] = xor <4 x i32> [[TMP7]], +// CHECK: [[NEG7:%.*]] = xor <4 x i32> [[TMP7]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[NEG7]], ptr @ui, align 8 // CHECK: [[TMP8:%.*]] = load volatile <4 x i32>, ptr @bi2, align 8 -// CHECK: [[NEG8:%.*]] = xor <4 x i32> [[TMP8]], +// CHECK: [[NEG8:%.*]] = xor <4 x i32> [[TMP8]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[NEG8]], ptr @bi, align 8 // CHECK: [[TMP9:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[NEG9:%.*]] = xor <2 x i64> [[TMP9]], +// CHECK: [[NEG9:%.*]] = xor <2 x i64> [[TMP9]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[NEG9]], ptr @sl, align 8 // CHECK: [[TMP10:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[NEG10:%.*]] = xor <2 x i64> [[TMP10]], +// CHECK: [[NEG10:%.*]] = xor <2 x i64> [[TMP10]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[NEG10]], ptr @ul, align 8 // CHECK: [[TMP11:%.*]] = load volatile <2 x i64>, ptr @bl2, align 8 -// CHECK: [[NEG11:%.*]] = xor <2 x i64> [[TMP11]], +// CHECK: [[NEG11:%.*]] = xor <2 x i64> [[TMP11]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[NEG11]], ptr @bl, align 8 // CHECK: ret void void test_not(void) { @@ -1932,7 +1932,7 @@ void test_xor_assign(void) { // CHECK: [[SHL2:%.*]] = shl <16 x i8> [[TMP4]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHL2]], ptr @sc, align 8 // CHECK: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8 -// CHECK: [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], +// CHECK: [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHL3]], ptr @sc, align 8 // CHECK: [[TMP7:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 // CHECK: [[TMP8:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 @@ -1950,7 +1950,7 @@ void test_xor_assign(void) { // CHECK: [[SHL9:%.*]] = shl <16 x i8> [[TMP11]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHL9]], ptr @uc, align 8 // CHECK: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 -// CHECK: [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], +// CHECK: [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHL10]], ptr @uc, align 8 // CHECK: [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 // CHECK: [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 @@ -1968,7 +1968,7 @@ void test_xor_assign(void) { // CHECK: [[SHL16:%.*]] = shl <8 x i16> [[TMP18]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHL16]], ptr @ss, align 8 // CHECK: [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 -// CHECK: [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], +// CHECK: [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHL17]], ptr @ss, align 8 // CHECK: [[TMP21:%.*]] = load volatile <8 x i16>, ptr @us, align 8 // CHECK: [[TMP22:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 @@ -1986,7 +1986,7 @@ void test_xor_assign(void) { // CHECK: [[SHL23:%.*]] = shl <8 x i16> [[TMP25]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHL23]], ptr @us, align 8 // CHECK: [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8 -// CHECK: [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], +// CHECK: [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHL24]], ptr @us, align 8 // CHECK: [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si, align 8 // CHECK: [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 @@ -2003,7 +2003,7 @@ void test_xor_assign(void) { // CHECK: [[SHL29:%.*]] = shl <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHL29]], ptr @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8 -// CHECK: [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], +// CHECK: [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHL30]], ptr @si, align 8 // CHECK: [[TMP35:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 // CHECK: [[TMP36:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 @@ -2020,7 +2020,7 @@ void test_xor_assign(void) { // CHECK: [[SHL35:%.*]] = shl <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHL35]], ptr @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 -// CHECK: [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], +// CHECK: [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHL36]], ptr @ui, align 8 // CHECK: [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 // CHECK: [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 @@ -2038,7 +2038,7 @@ void test_xor_assign(void) { // CHECK: [[SHL42:%.*]] = shl <2 x i64> [[TMP46]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHL42]], ptr @sl, align 8 // CHECK: [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 -// CHECK: [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], +// CHECK: [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHL43]], ptr @sl, align 8 // CHECK: [[TMP49:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 // CHECK: [[TMP50:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 @@ -2056,7 +2056,7 @@ void test_xor_assign(void) { // CHECK: [[SHL49:%.*]] = shl <2 x i64> [[TMP53]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHL49]], ptr @ul, align 8 // CHECK: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 -// CHECK: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], +// CHECK: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHL50]], ptr @ul, align 8 // CHECK: ret void void test_sl(void) { @@ -2115,7 +2115,7 @@ void test_sl(void) { // CHECK: [[SHL2:%.*]] = shl <16 x i8> [[TMP5]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHL2]], ptr @sc, align 8 // CHECK: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8 -// CHECK: [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], +// CHECK: [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHL3]], ptr @sc, align 8 // CHECK: [[TMP7:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 @@ -2133,7 +2133,7 @@ void test_sl(void) { // CHECK: [[SHL9:%.*]] = shl <16 x i8> [[TMP12]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHL9]], ptr @uc, align 8 // CHECK: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 -// CHECK: [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], +// CHECK: [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHL10]], ptr @uc, align 8 // CHECK: [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 // CHECK: [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 @@ -2151,7 +2151,7 @@ void test_sl(void) { // CHECK: [[SHL16:%.*]] = shl <8 x i16> [[TMP19]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHL16]], ptr @ss, align 8 // CHECK: [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 -// CHECK: [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], +// CHECK: [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHL17]], ptr @ss, align 8 // CHECK: [[TMP21:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 // CHECK: [[TMP22:%.*]] = load volatile <8 x i16>, ptr @us, align 8 @@ -2169,7 +2169,7 @@ void test_sl(void) { // CHECK: [[SHL23:%.*]] = shl <8 x i16> [[TMP26]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHL23]], ptr @us, align 8 // CHECK: [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8 -// CHECK: [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], +// CHECK: [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHL24]], ptr @us, align 8 // CHECK: [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 // CHECK: [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si, align 8 @@ -2186,7 +2186,7 @@ void test_sl(void) { // CHECK: [[SHL29:%.*]] = shl <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHL29]], ptr @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8 -// CHECK: [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], +// CHECK: [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHL30]], ptr @si, align 8 // CHECK: [[TMP35:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 // CHECK: [[TMP36:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 @@ -2203,7 +2203,7 @@ void test_sl(void) { // CHECK: [[SHL35:%.*]] = shl <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHL35]], ptr @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 -// CHECK: [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], +// CHECK: [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHL36]], ptr @ui, align 8 // CHECK: [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 // CHECK: [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 @@ -2221,7 +2221,7 @@ void test_sl(void) { // CHECK: [[SHL42:%.*]] = shl <2 x i64> [[TMP47]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHL42]], ptr @sl, align 8 // CHECK: [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 -// CHECK: [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], +// CHECK: [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHL43]], ptr @sl, align 8 // CHECK: [[TMP49:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 // CHECK: [[TMP50:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 @@ -2239,7 +2239,7 @@ void test_sl(void) { // CHECK: [[SHL49:%.*]] = shl <2 x i64> [[TMP54]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHL49]], ptr @ul, align 8 // CHECK: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 -// CHECK: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], +// CHECK: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHL50]], ptr @ul, align 8 // CHECK: ret void void test_sl_assign(void) { @@ -2298,7 +2298,7 @@ void test_sl_assign(void) { // CHECK: [[SHR2:%.*]] = ashr <16 x i8> [[TMP4]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHR2]], ptr @sc, align 8 // CHECK: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8 -// CHECK: [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], +// CHECK: [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHR3]], ptr @sc, align 8 // CHECK: [[TMP7:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 // CHECK: [[TMP8:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 @@ -2316,7 +2316,7 @@ void test_sl_assign(void) { // CHECK: [[SHR9:%.*]] = lshr <16 x i8> [[TMP11]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHR9]], ptr @uc, align 8 // CHECK: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 -// CHECK: [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], +// CHECK: [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHR10]], ptr @uc, align 8 // CHECK: [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 // CHECK: [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 @@ -2334,7 +2334,7 @@ void test_sl_assign(void) { // CHECK: [[SHR16:%.*]] = ashr <8 x i16> [[TMP18]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHR16]], ptr @ss, align 8 // CHECK: [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 -// CHECK: [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], +// CHECK: [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHR17]], ptr @ss, align 8 // CHECK: [[TMP21:%.*]] = load volatile <8 x i16>, ptr @us, align 8 // CHECK: [[TMP22:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 @@ -2352,7 +2352,7 @@ void test_sl_assign(void) { // CHECK: [[SHR23:%.*]] = lshr <8 x i16> [[TMP25]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHR23]], ptr @us, align 8 // CHECK: [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8 -// CHECK: [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], +// CHECK: [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHR24]], ptr @us, align 8 // CHECK: [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si, align 8 // CHECK: [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 @@ -2369,7 +2369,7 @@ void test_sl_assign(void) { // CHECK: [[SHR29:%.*]] = ashr <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHR29]], ptr @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8 -// CHECK: [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], +// CHECK: [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHR30]], ptr @si, align 8 // CHECK: [[TMP35:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 // CHECK: [[TMP36:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 @@ -2386,7 +2386,7 @@ void test_sl_assign(void) { // CHECK: [[SHR35:%.*]] = lshr <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHR35]], ptr @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 -// CHECK: [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], +// CHECK: [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHR36]], ptr @ui, align 8 // CHECK: [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 // CHECK: [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 @@ -2404,7 +2404,7 @@ void test_sl_assign(void) { // CHECK: [[SHR42:%.*]] = ashr <2 x i64> [[TMP46]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHR42]], ptr @sl, align 8 // CHECK: [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 -// CHECK: [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], +// CHECK: [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHR43]], ptr @sl, align 8 // CHECK: [[TMP49:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 // CHECK: [[TMP50:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 @@ -2422,7 +2422,7 @@ void test_sl_assign(void) { // CHECK: [[SHR49:%.*]] = lshr <2 x i64> [[TMP53]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHR49]], ptr @ul, align 8 // CHECK: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 -// CHECK: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], +// CHECK: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHR50]], ptr @ul, align 8 // CHECK: ret void void test_sr(void) { @@ -2481,7 +2481,7 @@ void test_sr(void) { // CHECK: [[SHR2:%.*]] = ashr <16 x i8> [[TMP5]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHR2]], ptr @sc, align 8 // CHECK: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8 -// CHECK: [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], +// CHECK: [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHR3]], ptr @sc, align 8 // CHECK: [[TMP7:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 @@ -2499,7 +2499,7 @@ void test_sr(void) { // CHECK: [[SHR9:%.*]] = lshr <16 x i8> [[TMP12]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHR9]], ptr @uc, align 8 // CHECK: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 -// CHECK: [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], +// CHECK: [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHR10]], ptr @uc, align 8 // CHECK: [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 // CHECK: [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 @@ -2517,7 +2517,7 @@ void test_sr(void) { // CHECK: [[SHR16:%.*]] = ashr <8 x i16> [[TMP19]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHR16]], ptr @ss, align 8 // CHECK: [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 -// CHECK: [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], +// CHECK: [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHR17]], ptr @ss, align 8 // CHECK: [[TMP21:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 // CHECK: [[TMP22:%.*]] = load volatile <8 x i16>, ptr @us, align 8 @@ -2535,7 +2535,7 @@ void test_sr(void) { // CHECK: [[SHR23:%.*]] = lshr <8 x i16> [[TMP26]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHR23]], ptr @us, align 8 // CHECK: [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8 -// CHECK: [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], +// CHECK: [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHR24]], ptr @us, align 8 // CHECK: [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 // CHECK: [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si, align 8 @@ -2552,7 +2552,7 @@ void test_sr(void) { // CHECK: [[SHR29:%.*]] = ashr <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHR29]], ptr @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8 -// CHECK: [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], +// CHECK: [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHR30]], ptr @si, align 8 // CHECK: [[TMP35:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 // CHECK: [[TMP36:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 @@ -2569,7 +2569,7 @@ void test_sr(void) { // CHECK: [[SHR35:%.*]] = lshr <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHR35]], ptr @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 -// CHECK: [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], +// CHECK: [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHR36]], ptr @ui, align 8 // CHECK: [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 // CHECK: [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 @@ -2587,7 +2587,7 @@ void test_sr(void) { // CHECK: [[SHR42:%.*]] = ashr <2 x i64> [[TMP47]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHR42]], ptr @sl, align 8 // CHECK: [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 -// CHECK: [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], +// CHECK: [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHR43]], ptr @sl, align 8 // CHECK: [[TMP49:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 // CHECK: [[TMP50:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 @@ -2605,7 +2605,7 @@ void test_sr(void) { // CHECK: [[SHR49:%.*]] = lshr <2 x i64> [[TMP54]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHR49]], ptr @ul, align 8 // CHECK: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 -// CHECK: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], +// CHECK: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHR50]], ptr @ul, align 8 // CHECK: ret void void test_sr_assign(void) { diff --git a/clang/test/CodeGen/SystemZ/zvector2.c b/clang/test/CodeGen/SystemZ/zvector2.c index 36cbf228feac869..5b036433519bdd2 100644 --- a/clang/test/CodeGen/SystemZ/zvector2.c +++ b/clang/test/CodeGen/SystemZ/zvector2.c @@ -32,7 +32,7 @@ void test_preinc (void) { // CHECK-LABEL: test_preinc // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2 -// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], +// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float 1.000000e+00) ++ff2; } @@ -40,7 +40,7 @@ void test_postinc (void) { // CHECK-LABEL: test_postinc // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2 -// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], +// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float 1.000000e+00) ff2++; } @@ -48,7 +48,7 @@ void test_predec (void) { // CHECK-LABEL: test_predec // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2 -// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], +// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float -1.000000e+00) --ff2; } @@ -56,7 +56,7 @@ void test_postdec (void) { // CHECK-LABEL: test_postdec // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2 -// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], +// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float -1.000000e+00) ff2--; } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 4e56204c8ad40fa..9ed6d47e8808a9f 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -53,14 +53,14 @@ __m256 test_mm256_and_ps(__m256 A, __m256 B) { __m256d test_mm256_andnot_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_andnot_pd - // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <4 x i64> return _mm256_andnot_pd(A, B); } __m256 test_mm256_andnot_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_andnot_ps - // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <8 x i32> return _mm256_andnot_ps(A, B); } diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c index cd94edcf58ea2f4..9935137ca867750 100644 --- a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c @@ -274,7 +274,7 @@ __m256bh test_mm256_loadu_pbh(void *p) { __m128bh test_mm_load_sbh(void const *A) { // CHECK-LABEL: test_mm_load_sbh - // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> to <8 x i1>), <8 x bfloat> %{{.*}}) + // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> splat (i8 1) to <8 x i1>), <8 x bfloat> %{{.*}}) return _mm_load_sbh(A); } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index a4a3a08efad51bf..27da56fb757230a 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -869,18 +869,18 @@ __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) { __m256i test_mm256_mul_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mul_epi32 - // CHECK: shl <4 x i64> %{{.*}}, - // CHECK: ashr <4 x i64> %{{.*}}, - // CHECK: shl <4 x i64> %{{.*}}, - // CHECK: ashr <4 x i64> %{{.*}}, + // CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + // CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) + // CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + // CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) // CHECK: mul <4 x i64> %{{.*}}, %{{.*}} return _mm256_mul_epi32(a, b); } __m256i test_mm256_mul_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mul_epu32 - // CHECK: and <4 x i64> %{{.*}}, - // CHECK: and <4 x i64> %{{.*}}, + // CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) + // CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) // CHECK: mul <4 x i64> %{{.*}}, %{{.*}} return _mm256_mul_epu32(a, b); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index f82a5d17b292ac8..1d18ca8548a3a3e 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -7,14 +7,14 @@ __mmask32 test_knot_mask32(__mmask32 a) { // CHECK-LABEL: @test_knot_mask32 // CHECK: [[IN:%.*]] = bitcast i32 %{{.*}} to <32 x i1> - // CHECK: [[NOT:%.*]] = xor <32 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[IN]], splat (i1 true) return _knot_mask32(a); } __mmask64 test_knot_mask64(__mmask64 a) { // CHECK-LABEL: @test_knot_mask64 // CHECK: [[IN:%.*]] = bitcast i64 %{{.*}} to <64 x i1> - // CHECK: [[NOT:%.*]] = xor <64 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[IN]], splat (i1 true) return _knot_mask64(a); } @@ -42,7 +42,7 @@ __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: @test_kandn_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> - // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <32 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu16_mask(_kandn_mask32(_mm512_cmpneq_epu16_mask(__A, __B), _mm512_cmpneq_epu16_mask(__C, __D)), @@ -53,7 +53,7 @@ __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: @test_kandn_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> - // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <64 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu8_mask(_kandn_mask64(_mm512_cmpneq_epu8_mask(__A, __B), _mm512_cmpneq_epu8_mask(__C, __D)), @@ -84,7 +84,7 @@ __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: @test_kxnor_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> - // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <32 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu16_mask(_kxnor_mask32(_mm512_cmpneq_epu16_mask(__A, __B), _mm512_cmpneq_epu16_mask(__C, __D)), @@ -95,7 +95,7 @@ __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: @test_kxnor_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> - // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <64 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu8_mask(_kxnor_mask64(_mm512_cmpneq_epu8_mask(__A, __B), _mm512_cmpneq_epu8_mask(__C, __D)), diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index b61c3eb3d54ad16..1ebd3696bce54f6 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -6,7 +6,7 @@ __mmask8 test_knot_mask8(__mmask8 a) { // CHECK-LABEL: @test_knot_mask8 // CHECK: [[IN:%.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], splat (i1 true) return _knot_mask8(a); } @@ -24,7 +24,7 @@ __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ // CHECK-LABEL: @test_kandn_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <8 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu64_mask(_kandn_mask8(_mm512_cmpneq_epu64_mask(__A, __B), _mm512_cmpneq_epu64_mask(__C, __D)), @@ -45,7 +45,7 @@ __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ // CHECK-LABEL: @test_kxnor_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <8 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu64_mask(_kxnor_mask8(_mm512_cmpneq_epu64_mask(__A, __B), _mm512_cmpneq_epu64_mask(__C, __D)), @@ -388,14 +388,14 @@ __m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_andnot_pd - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> return (__m512d) _mm512_andnot_pd(__A, __B); } __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_andnot_pd - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_andnot_pd(__W, __U, __A, __B); @@ -403,7 +403,7 @@ __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m51 __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_andnot_pd - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_andnot_pd(__U, __A, __B); @@ -411,14 +411,14 @@ __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { __m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_andnot_ps - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> return (__m512) _mm512_andnot_ps(__A, __B); } __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_andnot_ps - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_andnot_ps(__W, __U, __A, __B); @@ -426,7 +426,7 @@ __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_andnot_ps - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B); diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 372790a8cd668be..84e700cfbd3785c 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -440,7 +440,7 @@ __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: test_mm512_knot // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], splat (i1 true) // CHECK: bitcast <16 x i1> [[NOT]] to i16 return _mm512_knot(a); } @@ -2844,7 +2844,7 @@ __m512i test_mm512_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __ __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){ // CHECK-LABEL: test_mm512_maskz_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_andnot_epi32(__k,__A,__B); @@ -2853,7 +2853,7 @@ __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){ __m512i test_mm512_mask_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { // CHECK-LABEL: test_mm512_mask_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_andnot_epi32(__src,__k,__A,__B); @@ -2863,7 +2863,7 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_andnot_si512 //CHECK: load {{.*}}%__A.addr.i, align 64 - //CHECK: %not.i = xor{{.*}}, + //CHECK: %not.i = xor{{.*}}, splat (i64 -1) //CHECK: load {{.*}}%__B.addr.i, align 64 //CHECK: and <8 x i64> %not.i,{{.*}} @@ -2872,14 +2872,14 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} return _mm512_andnot_epi32(__A,__B); } __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_andnot_epi64(__k,__A,__B); @@ -2888,7 +2888,7 @@ __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_andnot_epi64(__src,__k,__A,__B); @@ -2896,7 +2896,7 @@ __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i test_mm512_andnot_epi64(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} return _mm512_andnot_epi64(__A,__B); } @@ -2987,20 +2987,20 @@ __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) { __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} return _mm512_mul_epi32(__A,__B); } __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_mul_epi32(__k,__A,__B); @@ -3008,10 +3008,10 @@ __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_mul_epi32(__src,__k,__A,__B); @@ -3019,16 +3019,16 @@ __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512 __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} return _mm512_mul_epu32(__A,__B); } __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_mul_epu32(__k,__A,__B); @@ -3036,8 +3036,8 @@ __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_mul_epu32(__src,__k,__A,__B); @@ -8299,7 +8299,7 @@ __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ // CHECK-LABEL: test_mm512_kandn // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B), @@ -8405,7 +8405,7 @@ __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ // CHECK-LABEL: test_mm512_kxnor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kxnor(_mm512_cmpneq_epu32_mask(__A, __B), @@ -8427,7 +8427,7 @@ __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __mmask16 test_knot_mask16(__mmask16 a) { // CHECK-LABEL: test_knot_mask16 // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], splat (i1 true) // CHECK: bitcast <16 x i1> [[NOT]] to i16 return _knot_mask16(a); } @@ -8447,7 +8447,7 @@ __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: test_kandn_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kandn_mask16(_mm512_cmpneq_epu32_mask(__A, __B), @@ -8470,7 +8470,7 @@ __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: test_kxnor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kxnor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c index 998177b1e72e54a..1e804c3db57d61c 100644 --- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c @@ -88,121 +88,121 @@ __m512i test_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const* __P) { __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi64 - // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47)) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47); } __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi64 - // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63)) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldi_epi64(__U, __A, __B, 63); } __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi64 - // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shldi_epi64(__A, __B, 31); } __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi32 - // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7)) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7); } __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi32 - // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15)) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldi_epi32(__U, __A, __B, 15); } __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi32 - // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shldi_epi32(__A, __B, 31); } __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi16 - // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3)) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3); } __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi16 - // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 7)) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldi_epi16(__U, __A, __B, 7); } __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi16 - // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15)) return _mm512_shldi_epi16(__A, __B, 15); } __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi64 - // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47)) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47); } __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi64 - // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63)) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63); } __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi64 - // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shrdi_epi64(__A, __B, 31); } __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi32 - // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7)) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 7); } __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi32 - // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15)) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shrdi_epi32(__U, __A, __B, 15); } __m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi32 - // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shrdi_epi32(__A, __B, 31); } __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi16 - // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3)) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 3); } __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi16 - // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15)) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shrdi_epi16(__U, __A, __B, 15); } __m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi16 - // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31)) return _mm512_shrdi_epi16(__A, __B, 31); } diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 6f544c21e798de9..1c2d467a4742880 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -726,10 +726,10 @@ __m128i test_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_mask_mul_epi32 - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, + //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_mul_epi32(__W, __M, __X, __Y); @@ -737,10 +737,10 @@ __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_maskz_mul_epi32 - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, + //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_mul_epi32(__M, __X, __Y); @@ -750,10 +750,10 @@ __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_mask_mul_epi32 - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, + //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_mul_epi32(__W, __M, __X, __Y); @@ -761,10 +761,10 @@ __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_maskz_mul_epi32 - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, + //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_mul_epi32(__M, __X, __Y); @@ -773,8 +773,8 @@ __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) { __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_mask_mul_epu32 - //CHECK: and <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, + //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_mul_epu32(__W, __M, __X, __Y); @@ -782,8 +782,8 @@ __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_maskz_mul_epu32 - //CHECK: and <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, + //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_mul_epu32(__M, __X, __Y); @@ -792,8 +792,8 @@ __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) { __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_mask_mul_epu32 - //CHECK: and <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, + //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_mul_epu32(__W, __M, __X, __Y); @@ -801,8 +801,8 @@ __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i test_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_maskz_mul_epu32 - //CHECK: and <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, + //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_mul_epu32(__M, __X, __Y); @@ -880,14 +880,14 @@ __m128i test_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { __m256i test_mm256_andnot_epi32 (__m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, + //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <8 x i32> %{{.*}}, %{{.*}} return _mm256_andnot_epi32(__A, __B); } __m256i test_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_mask_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, + //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <8 x i32> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_andnot_epi32(__W, __U, __A, __B); @@ -895,7 +895,7 @@ __m256i test_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __ __m256i test_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_maskz_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, + //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <8 x i32> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_andnot_epi32(__U, __A, __B); @@ -903,14 +903,14 @@ __m256i test_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { __m128i test_mm_andnot_epi32 (__m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, + //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <4 x i32> %{{.*}}, %{{.*}} return _mm_andnot_epi32(__A, __B); } __m128i test_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_mask_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, + //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <4 x i32> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_andnot_epi32(__W, __U, __A, __B); @@ -918,7 +918,7 @@ __m128i test_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m12 __m128i test_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_maskz_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, + //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <4 x i32> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_andnot_epi32(__U, __A, __B); @@ -1046,14 +1046,14 @@ __m128i test_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { __m256i test_mm256_andnot_epi64 (__m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, + //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <4 x i64> %{{.*}}, %{{.*}} return _mm256_andnot_epi64(__A, __B); } __m256i test_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_mask_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, + //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_andnot_epi64(__W, __U, __A, __B); @@ -1061,7 +1061,7 @@ __m256i test_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __ __m256i test_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_maskz_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, + //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_andnot_epi64(__U, __A, __B); @@ -1069,14 +1069,14 @@ __m256i test_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { __m128i test_mm_andnot_epi64 (__m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, + //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <2 x i64> %{{.*}}, %{{.*}} return _mm_andnot_epi64(__A, __B); } __m128i test_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_mask_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, + //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_andnot_epi64(__W,__U, __A, __B); @@ -1084,7 +1084,7 @@ __m128i test_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m12 __m128i test_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_maskz_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, + //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_andnot_epi64(__U, __A, __B); diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index f705bf7aa16f868..cdbd19a91211b6d 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -45,7 +45,7 @@ __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_andnot_pd - // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <4 x i64> %{{.*}}, %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_andnot_pd ( __W, __U, __A, __B); @@ -53,7 +53,7 @@ __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m25 __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_andnot_pd - // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <4 x i64> %{{.*}}, %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_andnot_pd (__U, __A, __B); @@ -61,7 +61,7 @@ __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_andnot_pd - // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <2 x i64> %{{.*}}, %{{.*}} // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_andnot_pd ( __W, __U, __A, __B); @@ -69,7 +69,7 @@ __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_andnot_pd - // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <2 x i64> %{{.*}}, %{{.*}} // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_andnot_pd (__U, __A, __B); @@ -77,7 +77,7 @@ __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_andnot_ps - // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <8 x i32> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_andnot_ps ( __W, __U, __A, __B); @@ -85,7 +85,7 @@ __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 _ __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_andnot_ps - // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <8 x i32> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_andnot_ps (__U, __A, __B); @@ -93,7 +93,7 @@ __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_andnot_ps - // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <4 x i32> %{{.*}}, %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_mask_andnot_ps ( __W, __U, __A, __B); @@ -101,7 +101,7 @@ __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) __m128 test_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_andnot_ps - // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <4 x i32> %{{.*}}, %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_andnot_ps (__U, __A, __B); diff --git a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c index a569293ddffc136..5760c71790debca 100644 --- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c @@ -172,241 +172,241 @@ __m256i test_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const* __P) { __m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi64 - // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 47)) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 47); } __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi64 - // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 63)) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shldi_epi64(__U, __A, __B, 63); } __m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi64 - // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31)) return _mm256_shldi_epi64(__A, __B, 31); } __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi64 - // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 47)) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shldi_epi64(__S, __U, __A, __B, 47); } __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi64 - // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 63)) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shldi_epi64(__U, __A, __B, 63); } __m128i test_mm_shldi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi64 - // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shldi_epi64(__A, __B, 31); } __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi32 - // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 7)) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 7); } __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi32 - // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 15)) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shldi_epi32(__U, __A, __B, 15); } __m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi32 - // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31)) return _mm256_shldi_epi32(__A, __B, 31); } __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi32 - // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 7)) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shldi_epi32(__S, __U, __A, __B, 7); } __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi32 - // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 15)) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shldi_epi32(__U, __A, __B, 15); } __m128i test_mm_shldi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi32 - // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shldi_epi32(__A, __B, 31); } __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi16 - // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 3)) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 3); } __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi16 - // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 7)) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shldi_epi16(__U, __A, __B, 7); } __m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi16 - // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shldi_epi16(__A, __B, 31); } __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi16 - // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 3)) // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shldi_epi16(__S, __U, __A, __B, 3); } __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi16 - // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 7)) // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shldi_epi16(__U, __A, __B, 7); } __m128i test_mm_shldi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi16 - // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shldi_epi16(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi64 - // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 47)) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 47); } __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi64 - // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 63)) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63); } __m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi64 - // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31) return _mm256_shrdi_epi64(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi64 - // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 47)) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shrdi_epi64(__S, __U, __A, __B, 47); } __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi64 - // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 63)) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shrdi_epi64(__U, __A, __B, 63); } __m128i test_mm_shrdi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi64 - // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shrdi_epi64(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi32 - // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 7)) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 7); } __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi32 - // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 15)) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shrdi_epi32(__U, __A, __B, 15); } __m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi32 - // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31) return _mm256_shrdi_epi32(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi32 - // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 7)) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shrdi_epi32(__S, __U, __A, __B, 7); } __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi32 - // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 15)) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shrdi_epi32(__U, __A, __B, 15); } __m128i test_mm_shrdi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi32 - // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shrdi_epi32(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi16 - // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 3)) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 3); } __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi16 - // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 7)) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shrdi_epi16(__U, __A, __B, 7); } __m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi16 - // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shrdi_epi16(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi16 - // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 3)) // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shrdi_epi16(__S, __U, __A, __B, 3); } __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi16 - // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 7)) // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shrdi_epi16(__U, __A, __B, 7); } __m128i test_mm_shrdi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi16 - // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shrdi_epi16(__A, __B, 31); } diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 280faa0274cb85d..2bb318177e4d593 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -86,7 +86,7 @@ __m64 test_mm_and_si64(__m64 a, __m64 b) { __m64 test_mm_andnot_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_andnot_si64 - // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}}, + // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}}, splat (i64 -1) // CHECK: and <1 x i64> [[TMP]], {{%.*}} return _mm_andnot_si64(a, b); } @@ -333,8 +333,8 @@ int test_mm_movemask_pi8(__m64 a) { __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mul_su32 - // CHECK: and <2 x i64> {{%.*}}, - // CHECK: and <2 x i64> {{%.*}}, + // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295) + // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295) // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_su32(a, b); } diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index f779ab07a266408..104bfea05469e8a 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -35,7 +35,7 @@ TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m1 __m128 test_mm_andnot_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_andnot_ps - // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <4 x i32> return _mm_andnot_ps(A, B); } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index c4493a49120543a..adcb6876f7f55d3 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -99,7 +99,7 @@ __m128i test_mm_and_si128(__m128i A, __m128i B) { __m128d test_mm_andnot_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_andnot_pd - // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <2 x i64> return _mm_andnot_pd(A, B); } @@ -107,7 +107,7 @@ TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, __m128i test_mm_andnot_si128(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_andnot_si128 - // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <2 x i64> return _mm_andnot_si128(A, B); } @@ -906,8 +906,8 @@ int test_mm_movemask_pd(__m128d A) { __m128i test_mm_mul_epu32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_mul_epu32 - // CHECK: and <2 x i64> %{{.*}}, - // CHECK: and <2 x i64> %{{.*}}, + // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) + // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_epu32(A, B); } diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 340874a4fdd3c40..d71a4b7e789ef86 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -312,10 +312,10 @@ __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) { __m128i test_mm_mul_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_mul_epi32 - // CHECK: shl <2 x i64> %{{.*}}, - // CHECK: ashr <2 x i64> %{{.*}}, - // CHECK: shl <2 x i64> %{{.*}}, - // CHECK: ashr <2 x i64> %{{.*}}, + // CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + // CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) + // CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + // CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_epi32(x, y); } diff --git a/clang/test/CodeGen/X86/xop-builtins-cmp.c b/clang/test/CodeGen/X86/xop-builtins-cmp.c index af7bc6ce25c4e49..76413df5c8a2017 100644 --- a/clang/test/CodeGen/X86/xop-builtins-cmp.c +++ b/clang/test/CodeGen/X86/xop-builtins-cmp.c @@ -408,48 +408,48 @@ __m128i test_mm_comfalse_epi64(__m128i a, __m128i b) { __m128i test_mm_comtrue_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu8 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epu8(a, b); } __m128i test_mm_comtrue_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu16 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epu16(a, b); } __m128i test_mm_comtrue_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu32 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epu32(a, b); } __m128i test_mm_comtrue_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu64 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epu64(a, b); } __m128i test_mm_comtrue_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi8 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epi8(a, b); } __m128i test_mm_comtrue_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi16 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epi16(a, b); } __m128i test_mm_comtrue_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi32 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epi32(a, b); } __m128i test_mm_comtrue_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi64 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epi64(a, b); } diff --git a/clang/test/CodeGen/X86/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c index 113af58a69339de..8ba6b8bc5915b1c 100644 --- a/clang/test/CodeGen/X86/xop-builtins.c +++ b/clang/test/CodeGen/X86/xop-builtins.c @@ -173,7 +173,7 @@ __m128i test_mm_hsubq_epi32(__m128i a) { __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_cmov_si128 // CHECK: [[AND:%.*]] = and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, + // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK-NEXT: [[ANDN:%.*]] = and <2 x i64> %{{.*}}, [[NEG]] // CHECK-NEXT: %{{.*}} = or <2 x i64> [[AND]], [[ANDN]] return _mm_cmov_si128(a, b, c); @@ -182,7 +182,7 @@ __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) { __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) { // CHECK-LABEL: test_mm256_cmov_si256 // CHECK: [[AND:%.*]] = and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, + // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, splat (i64 -1) // CHECK-NEXT: [[ANDN:%.*]] = and <4 x i64> %{{.*}}, [[NEG]] // CHECK-NEXT: %{{.*}} = or <4 x i64> [[AND]], [[ANDN]] return _mm256_cmov_si256(a, b, c); @@ -220,25 +220,25 @@ __m128i test_mm_rot_epi64(__m128i a, __m128i b) { __m128i test_mm_roti_epi8(__m128i a) { // CHECK-LABEL: test_mm_roti_epi8 - // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> ) + // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> splat (i8 1)) return _mm_roti_epi8(a, 1); } __m128i test_mm_roti_epi16(__m128i a) { // CHECK-LABEL: test_mm_roti_epi16 - // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 50)) return _mm_roti_epi16(a, 50); } __m128i test_mm_roti_epi32(__m128i a) { // CHECK-LABEL: test_mm_roti_epi32 - // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 226)) return _mm_roti_epi32(a, -30); } __m128i test_mm_roti_epi64(__m128i a) { // CHECK-LABEL: test_mm_roti_epi64 - // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 100)) return _mm_roti_epi64(a, 100); } diff --git a/clang/test/CodeGen/aarch64-neon-3v.c b/clang/test/CodeGen/aarch64-neon-3v.c index 9ed439379722bcd..3580f6d38dc36ad 100644 --- a/clang/test/CodeGen/aarch64-neon-3v.c +++ b/clang/test/CodeGen/aarch64-neon-3v.c @@ -341,7 +341,7 @@ uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: define{{.*}} <8 x i8> @test_vbic_s8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { @@ -349,7 +349,7 @@ int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { } // CHECK-LABEL: define{{.*}} <16 x i8> @test_vbicq_s8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { @@ -357,7 +357,7 @@ int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { } // CHECK-LABEL: define{{.*}} <4 x i16> @test_vbic_s16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { @@ -365,7 +365,7 @@ int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { } // CHECK-LABEL: define{{.*}} <8 x i16> @test_vbicq_s16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { @@ -373,7 +373,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { } // CHECK-LABEL: define{{.*}} <2 x i32> @test_vbic_s32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { @@ -381,7 +381,7 @@ int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: define{{.*}} <4 x i32> @test_vbicq_s32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { @@ -389,7 +389,7 @@ int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbic_s64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { @@ -397,7 +397,7 @@ int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbicq_s64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { @@ -405,7 +405,7 @@ int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: define{{.*}} <8 x i8> @test_vbic_u8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { @@ -413,7 +413,7 @@ uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { } // CHECK-LABEL: define{{.*}} <16 x i8> @test_vbicq_u8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { @@ -421,7 +421,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { } // CHECK-LABEL: define{{.*}} <4 x i16> @test_vbic_u16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { @@ -429,7 +429,7 @@ uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { } // CHECK-LABEL: define{{.*}} <8 x i16> @test_vbicq_u16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { @@ -437,7 +437,7 @@ uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { } // CHECK-LABEL: define{{.*}} <2 x i32> @test_vbic_u32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { @@ -445,7 +445,7 @@ uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: define{{.*}} <4 x i32> @test_vbicq_u32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { @@ -453,7 +453,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbic_u64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { @@ -461,7 +461,7 @@ uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbicq_u64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { @@ -469,7 +469,7 @@ uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: define{{.*}} <8 x i8> @test_vorn_s8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { @@ -477,7 +477,7 @@ int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { } // CHECK-LABEL: define{{.*}} <16 x i8> @test_vornq_s8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { @@ -485,7 +485,7 @@ int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { } // CHECK-LABEL: define{{.*}} <4 x i16> @test_vorn_s16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { @@ -493,7 +493,7 @@ int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { } // CHECK-LABEL: define{{.*}} <8 x i16> @test_vornq_s16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { @@ -501,7 +501,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { } // CHECK-LABEL: define{{.*}} <2 x i32> @test_vorn_s32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { @@ -509,7 +509,7 @@ int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: define{{.*}} <4 x i32> @test_vornq_s32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { @@ -517,7 +517,7 @@ int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vorn_s64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { @@ -525,7 +525,7 @@ int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vornq_s64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { @@ -533,7 +533,7 @@ int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: define{{.*}} <8 x i8> @test_vorn_u8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { @@ -541,7 +541,7 @@ uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { } // CHECK-LABEL: define{{.*}} <16 x i8> @test_vornq_u8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { @@ -549,7 +549,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { } // CHECK-LABEL: define{{.*}} <4 x i16> @test_vorn_u16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { @@ -557,7 +557,7 @@ uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { } // CHECK-LABEL: define{{.*}} <8 x i16> @test_vornq_u16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { @@ -565,7 +565,7 @@ uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { } // CHECK-LABEL: define{{.*}} <2 x i32> @test_vorn_u32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { @@ -573,7 +573,7 @@ uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: define{{.*}} <4 x i32> @test_vornq_u32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { @@ -581,7 +581,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vorn_u64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { @@ -589,7 +589,7 @@ uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vornq_u64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index ef1623bb17003a7..271ae056308d2f4 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -960,7 +960,7 @@ float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { // CHECK-LABEL: @test_vbsl_s8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <8 x i8> [[VBSL2_I]] @@ -973,7 +973,7 @@ int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> @@ -987,7 +987,7 @@ int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i32> [[VBSL5_I]] @@ -1000,7 +1000,7 @@ int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] @@ -1010,7 +1010,7 @@ int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) { // CHECK-LABEL: @test_vbsl_u8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <8 x i8> [[VBSL2_I]] @@ -1023,7 +1023,7 @@ uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i16> [[VBSL5_I]] @@ -1036,7 +1036,7 @@ uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i32> [[VBSL5_I]] @@ -1049,7 +1049,7 @@ uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] @@ -1064,7 +1064,7 @@ uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]] -// CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, +// CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> @@ -1080,7 +1080,7 @@ float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) { // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double> @@ -1091,7 +1091,7 @@ float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) { // CHECK-LABEL: @test_vbsl_p8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <8 x i8> [[VBSL2_I]] @@ -1104,7 +1104,7 @@ poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i16> [[VBSL5_I]] @@ -1114,7 +1114,7 @@ poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { // CHECK-LABEL: @test_vbslq_s8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <16 x i8> [[VBSL2_I]] @@ -1127,7 +1127,7 @@ int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] @@ -1140,7 +1140,7 @@ int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i32> [[VBSL5_I]] @@ -1153,7 +1153,7 @@ int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] @@ -1163,7 +1163,7 @@ int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { // CHECK-LABEL: @test_vbslq_u8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <16 x i8> [[VBSL2_I]] @@ -1176,7 +1176,7 @@ uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] @@ -1189,7 +1189,7 @@ uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i32> [[VBSL5_I]] @@ -1202,7 +1202,7 @@ int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] @@ -1217,7 +1217,7 @@ uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> @@ -1228,7 +1228,7 @@ float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { // CHECK-LABEL: @test_vbslq_p8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <16 x i8> [[VBSL2_I]] @@ -1241,7 +1241,7 @@ poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] @@ -1256,7 +1256,7 @@ poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double> @@ -4600,7 +4600,7 @@ float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) { } // CHECK-LABEL: @test_vshl_n_s8( -// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3) // CHECK: ret <8 x i8> [[VSHL_N]] int8x8_t test_vshl_n_s8(int8x8_t a) { return vshl_n_s8(a, 3); @@ -4609,7 +4609,7 @@ int8x8_t test_vshl_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <4 x i16> [[VSHL_N]] int16x4_t test_vshl_n_s16(int16x4_t a) { return vshl_n_s16(a, 3); @@ -4618,14 +4618,14 @@ int16x4_t test_vshl_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <2 x i32> [[VSHL_N]] int32x2_t test_vshl_n_s32(int32x2_t a) { return vshl_n_s32(a, 3); } // CHECK-LABEL: @test_vshlq_n_s8( -// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3) // CHECK: ret <16 x i8> [[VSHL_N]] int8x16_t test_vshlq_n_s8(int8x16_t a) { return vshlq_n_s8(a, 3); @@ -4634,7 +4634,7 @@ int8x16_t test_vshlq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHL_N]] int16x8_t test_vshlq_n_s16(int16x8_t a) { return vshlq_n_s16(a, 3); @@ -4643,7 +4643,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <4 x i32> [[VSHL_N]] int32x4_t test_vshlq_n_s32(int32x4_t a) { return vshlq_n_s32(a, 3); @@ -4652,14 +4652,14 @@ int32x4_t test_vshlq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3) // CHECK: ret <2 x i64> [[VSHL_N]] int64x2_t test_vshlq_n_s64(int64x2_t a) { return vshlq_n_s64(a, 3); } // CHECK-LABEL: @test_vshl_n_u8( -// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3) // CHECK: ret <8 x i8> [[VSHL_N]] uint8x8_t test_vshl_n_u8(uint8x8_t a) { return vshl_n_u8(a, 3); @@ -4668,7 +4668,7 @@ uint8x8_t test_vshl_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <4 x i16> [[VSHL_N]] uint16x4_t test_vshl_n_u16(uint16x4_t a) { return vshl_n_u16(a, 3); @@ -4677,14 +4677,14 @@ uint16x4_t test_vshl_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <2 x i32> [[VSHL_N]] uint32x2_t test_vshl_n_u32(uint32x2_t a) { return vshl_n_u32(a, 3); } // CHECK-LABEL: @test_vshlq_n_u8( -// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3) // CHECK: ret <16 x i8> [[VSHL_N]] uint8x16_t test_vshlq_n_u8(uint8x16_t a) { return vshlq_n_u8(a, 3); @@ -4693,7 +4693,7 @@ uint8x16_t test_vshlq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHL_N]] uint16x8_t test_vshlq_n_u16(uint16x8_t a) { return vshlq_n_u16(a, 3); @@ -4702,7 +4702,7 @@ uint16x8_t test_vshlq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <4 x i32> [[VSHL_N]] uint32x4_t test_vshlq_n_u32(uint32x4_t a) { return vshlq_n_u32(a, 3); @@ -4711,14 +4711,14 @@ uint32x4_t test_vshlq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3) // CHECK: ret <2 x i64> [[VSHL_N]] uint64x2_t test_vshlq_n_u64(uint64x2_t a) { return vshlq_n_u64(a, 3); } // CHECK-LABEL: @test_vshr_n_s8( -// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, splat (i8 3) // CHECK: ret <8 x i8> [[VSHR_N]] int8x8_t test_vshr_n_s8(int8x8_t a) { return vshr_n_s8(a, 3); @@ -4727,7 +4727,7 @@ int8x8_t test_vshr_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <4 x i16> [[VSHR_N]] int16x4_t test_vshr_n_s16(int16x4_t a) { return vshr_n_s16(a, 3); @@ -4736,14 +4736,14 @@ int16x4_t test_vshr_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <2 x i32> [[VSHR_N]] int32x2_t test_vshr_n_s32(int32x2_t a) { return vshr_n_s32(a, 3); } // CHECK-LABEL: @test_vshrq_n_s8( -// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, splat (i8 3) // CHECK: ret <16 x i8> [[VSHR_N]] int8x16_t test_vshrq_n_s8(int8x16_t a) { return vshrq_n_s8(a, 3); @@ -4752,7 +4752,7 @@ int8x16_t test_vshrq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHR_N]] int16x8_t test_vshrq_n_s16(int16x8_t a) { return vshrq_n_s16(a, 3); @@ -4761,7 +4761,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <4 x i32> [[VSHR_N]] int32x4_t test_vshrq_n_s32(int32x4_t a) { return vshrq_n_s32(a, 3); @@ -4770,14 +4770,14 @@ int32x4_t test_vshrq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 3) // CHECK: ret <2 x i64> [[VSHR_N]] int64x2_t test_vshrq_n_s64(int64x2_t a) { return vshrq_n_s64(a, 3); } // CHECK-LABEL: @test_vshr_n_u8( -// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, splat (i8 3) // CHECK: ret <8 x i8> [[VSHR_N]] uint8x8_t test_vshr_n_u8(uint8x8_t a) { return vshr_n_u8(a, 3); @@ -4786,7 +4786,7 @@ uint8x8_t test_vshr_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <4 x i16> [[VSHR_N]] uint16x4_t test_vshr_n_u16(uint16x4_t a) { return vshr_n_u16(a, 3); @@ -4795,14 +4795,14 @@ uint16x4_t test_vshr_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <2 x i32> [[VSHR_N]] uint32x2_t test_vshr_n_u32(uint32x2_t a) { return vshr_n_u32(a, 3); } // CHECK-LABEL: @test_vshrq_n_u8( -// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, splat (i8 3) // CHECK: ret <16 x i8> [[VSHR_N]] uint8x16_t test_vshrq_n_u8(uint8x16_t a) { return vshrq_n_u8(a, 3); @@ -4811,7 +4811,7 @@ uint8x16_t test_vshrq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHR_N]] uint16x8_t test_vshrq_n_u16(uint16x8_t a) { return vshrq_n_u16(a, 3); @@ -4820,7 +4820,7 @@ uint16x8_t test_vshrq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <4 x i32> [[VSHR_N]] uint32x4_t test_vshrq_n_u32(uint32x4_t a) { return vshrq_n_u32(a, 3); @@ -4829,14 +4829,14 @@ uint32x4_t test_vshrq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 3) // CHECK: ret <2 x i64> [[VSHR_N]] uint64x2_t test_vshrq_n_u64(uint64x2_t a) { return vshrq_n_u64(a, 3); } // CHECK-LABEL: @test_vsra_n_s8( -// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, splat (i8 3) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { @@ -4848,7 +4848,7 @@ int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 3) // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { @@ -4860,7 +4860,7 @@ int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 3) // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { @@ -4868,7 +4868,7 @@ int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vsraq_n_s8( -// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, splat (i8 3) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { @@ -4880,7 +4880,7 @@ int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 3) // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { @@ -4892,7 +4892,7 @@ int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 3) // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { @@ -4904,7 +4904,7 @@ int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 3) // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { @@ -4912,7 +4912,7 @@ int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vsra_n_u8( -// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, splat (i8 3) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { @@ -4924,7 +4924,7 @@ uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 3) // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { @@ -4936,7 +4936,7 @@ uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 3) // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { @@ -4944,7 +4944,7 @@ uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vsraq_n_u8( -// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, splat (i8 3) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { @@ -4956,7 +4956,7 @@ uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 3) // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { @@ -4968,7 +4968,7 @@ uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 3) // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { @@ -4980,7 +4980,7 @@ uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 3) // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { @@ -4988,7 +4988,7 @@ uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vrshr_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3)) // CHECK: ret <8 x i8> [[VRSHR_N]] int8x8_t test_vrshr_n_s8(int8x8_t a) { return vrshr_n_s8(a, 3); @@ -4997,7 +4997,7 @@ int8x8_t test_vrshr_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vrshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) // CHECK: ret <4 x i16> [[VRSHR_N1]] int16x4_t test_vrshr_n_s16(int16x4_t a) { return vrshr_n_s16(a, 3); @@ -5006,14 +5006,14 @@ int16x4_t test_vrshr_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vrshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) // CHECK: ret <2 x i32> [[VRSHR_N1]] int32x2_t test_vrshr_n_s32(int32x2_t a) { return vrshr_n_s32(a, 3); } // CHECK-LABEL: @test_vrshrq_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3)) // CHECK: ret <16 x i8> [[VRSHR_N]] int8x16_t test_vrshrq_n_s8(int8x16_t a) { return vrshrq_n_s8(a, 3); @@ -5022,7 +5022,7 @@ int8x16_t test_vrshrq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vrshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) // CHECK: ret <8 x i16> [[VRSHR_N1]] int16x8_t test_vrshrq_n_s16(int16x8_t a) { return vrshrq_n_s16(a, 3); @@ -5031,7 +5031,7 @@ int16x8_t test_vrshrq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vrshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) // CHECK: ret <4 x i32> [[VRSHR_N1]] int32x4_t test_vrshrq_n_s32(int32x4_t a) { return vrshrq_n_s32(a, 3); @@ -5040,14 +5040,14 @@ int32x4_t test_vrshrq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vrshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) // CHECK: ret <2 x i64> [[VRSHR_N1]] int64x2_t test_vrshrq_n_s64(int64x2_t a) { return vrshrq_n_s64(a, 3); } // CHECK-LABEL: @test_vrshr_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3)) // CHECK: ret <8 x i8> [[VRSHR_N]] uint8x8_t test_vrshr_n_u8(uint8x8_t a) { return vrshr_n_u8(a, 3); @@ -5056,7 +5056,7 @@ uint8x8_t test_vrshr_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vrshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) // CHECK: ret <4 x i16> [[VRSHR_N1]] uint16x4_t test_vrshr_n_u16(uint16x4_t a) { return vrshr_n_u16(a, 3); @@ -5065,14 +5065,14 @@ uint16x4_t test_vrshr_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vrshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) // CHECK: ret <2 x i32> [[VRSHR_N1]] uint32x2_t test_vrshr_n_u32(uint32x2_t a) { return vrshr_n_u32(a, 3); } // CHECK-LABEL: @test_vrshrq_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3)) // CHECK: ret <16 x i8> [[VRSHR_N]] uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { return vrshrq_n_u8(a, 3); @@ -5081,7 +5081,7 @@ uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vrshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) // CHECK: ret <8 x i16> [[VRSHR_N1]] uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { return vrshrq_n_u16(a, 3); @@ -5090,7 +5090,7 @@ uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vrshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) // CHECK: ret <4 x i32> [[VRSHR_N1]] uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { return vrshrq_n_u32(a, 3); @@ -5099,14 +5099,14 @@ uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vrshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) // CHECK: ret <2 x i64> [[VRSHR_N1]] uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { return vrshrq_n_u64(a, 3); } // CHECK-LABEL: @test_vrsra_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3)) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { @@ -5117,7 +5117,7 @@ int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i16> [[TMP3]] @@ -5129,7 +5129,7 @@ int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i32> [[TMP3]] @@ -5138,7 +5138,7 @@ int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vrsraq_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3)) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { @@ -5149,7 +5149,7 @@ int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <8 x i16> [[TMP3]] @@ -5161,7 +5161,7 @@ int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i32> [[TMP3]] @@ -5173,7 +5173,7 @@ int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i64> [[TMP3]] @@ -5182,7 +5182,7 @@ int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vrsra_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3)) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { @@ -5193,7 +5193,7 @@ uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i16> [[TMP3]] @@ -5205,7 +5205,7 @@ uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i32> [[TMP3]] @@ -5214,7 +5214,7 @@ uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vrsraq_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3)) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { @@ -5225,7 +5225,7 @@ uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <8 x i16> [[TMP3]] @@ -5237,7 +5237,7 @@ uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i32> [[TMP3]] @@ -5249,7 +5249,7 @@ uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i64> [[TMP3]] @@ -5606,7 +5606,7 @@ poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { } // CHECK-LABEL: @test_vqshlu_n_s8( -// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 3)) // CHECK: ret <8 x i8> [[VQSHLU_N]] uint8x8_t test_vqshlu_n_s8(int8x8_t a) { return vqshlu_n_s8(a, 3); @@ -5615,7 +5615,7 @@ uint8x8_t test_vqshlu_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vqshlu_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 3)) // CHECK: ret <4 x i16> [[VQSHLU_N1]] uint16x4_t test_vqshlu_n_s16(int16x4_t a) { return vqshlu_n_s16(a, 3); @@ -5624,14 +5624,14 @@ uint16x4_t test_vqshlu_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vqshlu_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 3)) // CHECK: ret <2 x i32> [[VQSHLU_N1]] uint32x2_t test_vqshlu_n_s32(int32x2_t a) { return vqshlu_n_s32(a, 3); } // CHECK-LABEL: @test_vqshluq_n_s8( -// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 3)) // CHECK: ret <16 x i8> [[VQSHLU_N]] uint8x16_t test_vqshluq_n_s8(int8x16_t a) { return vqshluq_n_s8(a, 3); @@ -5640,7 +5640,7 @@ uint8x16_t test_vqshluq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vqshluq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 3)) // CHECK: ret <8 x i16> [[VQSHLU_N1]] uint16x8_t test_vqshluq_n_s16(int16x8_t a) { return vqshluq_n_s16(a, 3); @@ -5649,7 +5649,7 @@ uint16x8_t test_vqshluq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshluq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 3)) // CHECK: ret <4 x i32> [[VQSHLU_N1]] uint32x4_t test_vqshluq_n_s32(int32x4_t a) { return vqshluq_n_s32(a, 3); @@ -5658,7 +5658,7 @@ uint32x4_t test_vqshluq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshluq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 3)) // CHECK: ret <2 x i64> [[VQSHLU_N1]] uint64x2_t test_vqshluq_n_s64(int64x2_t a) { return vqshluq_n_s64(a, 3); @@ -5667,7 +5667,7 @@ uint64x2_t test_vqshluq_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: ret <8 x i8> [[VSHRN_N]] int8x8_t test_vshrn_n_s16(int16x8_t a) { @@ -5677,7 +5677,7 @@ int8x8_t test_vshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: ret <4 x i16> [[VSHRN_N]] int16x4_t test_vshrn_n_s32(int32x4_t a) { @@ -5687,7 +5687,7 @@ int16x4_t test_vshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: ret <2 x i32> [[VSHRN_N]] int32x2_t test_vshrn_n_s64(int64x2_t a) { @@ -5697,7 +5697,7 @@ int32x2_t test_vshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: ret <8 x i8> [[VSHRN_N]] uint8x8_t test_vshrn_n_u16(uint16x8_t a) { @@ -5707,7 +5707,7 @@ uint8x8_t test_vshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: ret <4 x i16> [[VSHRN_N]] uint16x4_t test_vshrn_n_u32(uint32x4_t a) { @@ -5717,7 +5717,7 @@ uint16x4_t test_vshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: ret <2 x i32> [[VSHRN_N]] uint32x2_t test_vshrn_n_u64(uint64x2_t a) { @@ -5727,7 +5727,7 @@ uint32x2_t test_vshrn_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vshrn_high_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] @@ -5738,7 +5738,7 @@ int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vshrn_high_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] @@ -5749,7 +5749,7 @@ int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vshrn_high_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] @@ -5760,7 +5760,7 @@ int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vshrn_high_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] @@ -5771,7 +5771,7 @@ uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vshrn_high_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] @@ -5782,7 +5782,7 @@ uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vshrn_high_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] @@ -6248,7 +6248,7 @@ uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 3); @@ -6258,7 +6258,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 9); @@ -6268,7 +6268,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 19); @@ -6276,7 +6276,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 3); @@ -6286,7 +6286,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 9); @@ -6296,7 +6296,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 19); @@ -6305,7 +6305,7 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vshll_high_n_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_high_n_s8(int8x16_t a) { return vshll_high_n_s8(a, 3); @@ -6316,7 +6316,7 @@ int16x8_t test_vshll_high_n_s8(int8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_high_n_s16(int16x8_t a) { return vshll_high_n_s16(a, 9); @@ -6327,7 +6327,7 @@ int32x4_t test_vshll_high_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_high_n_s32(int32x4_t a) { return vshll_high_n_s32(a, 19); @@ -6336,7 +6336,7 @@ int64x2_t test_vshll_high_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshll_high_n_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { return vshll_high_n_u8(a, 3); @@ -6347,7 +6347,7 @@ uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { return vshll_high_n_u16(a, 9); @@ -6358,7 +6358,7 @@ uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { return vshll_high_n_u32(a, 19); @@ -7066,7 +7066,7 @@ uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { @@ -7077,7 +7077,7 @@ int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { @@ -7088,7 +7088,7 @@ int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { @@ -7099,7 +7099,7 @@ int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { @@ -7110,7 +7110,7 @@ uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { @@ -7121,7 +7121,7 @@ uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { @@ -7132,7 +7132,7 @@ uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] @@ -7144,7 +7144,7 @@ int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7156,7 +7156,7 @@ int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7168,7 +7168,7 @@ int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] @@ -7180,7 +7180,7 @@ uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7192,7 +7192,7 @@ uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7326,7 +7326,7 @@ uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { @@ -7337,7 +7337,7 @@ int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { @@ -7348,7 +7348,7 @@ int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { @@ -7359,7 +7359,7 @@ int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { @@ -7370,7 +7370,7 @@ uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { @@ -7381,7 +7381,7 @@ uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { @@ -7392,7 +7392,7 @@ uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] @@ -7404,7 +7404,7 @@ int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7416,7 +7416,7 @@ int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7428,7 +7428,7 @@ int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] @@ -7440,7 +7440,7 @@ uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7452,7 +7452,7 @@ uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -13499,7 +13499,7 @@ int64_t test_vshrd_n_s64(int64_t a) { // CHECK-LABEL: @test_vshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHR_N]] int64x1_t test_vshr_n_s64(int64x1_t a) { return vshr_n_s64(a, 1); @@ -13521,7 +13521,7 @@ uint64_t test_vshrd_n_u64_2() { // CHECK-LABEL: @test_vshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHR_N]] uint64x1_t test_vshr_n_u64(uint64x1_t a) { return vshr_n_u64(a, 1); @@ -13537,7 +13537,7 @@ int64_t test_vrshrd_n_s64(int64_t a) { // CHECK-LABEL: @test_vrshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VRSHR_N1]] int64x1_t test_vrshr_n_s64(int64x1_t a) { return vrshr_n_s64(a, 1); @@ -13553,7 +13553,7 @@ uint64_t test_vrshrd_n_u64(uint64_t a) { // CHECK-LABEL: @test_vrshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VRSHR_N1]] uint64x1_t test_vrshr_n_u64(uint64x1_t a) { return vrshr_n_u64(a, 1); @@ -13572,7 +13572,7 @@ int64_t test_vsrad_n_s64(int64_t a, int64_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <1 x i64> [[TMP4]] int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { @@ -13598,7 +13598,7 @@ uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <1 x i64> [[TMP4]] uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { @@ -13617,7 +13617,7 @@ int64_t test_vrsrad_n_s64(int64_t a, int64_t b) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <1 x i64> [[TMP3]] @@ -13637,7 +13637,7 @@ uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <1 x i64> [[TMP3]] @@ -13655,7 +13655,7 @@ int64_t test_vshld_n_s64(int64_t a) { // CHECK-LABEL: @test_vshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHL_N]] int64x1_t test_vshl_n_s64(int64x1_t a) { return vshl_n_s64(a, 1); @@ -13671,7 +13671,7 @@ uint64_t test_vshld_n_u64(uint64_t a) { // CHECK-LABEL: @test_vshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHL_N]] uint64x1_t test_vshl_n_u64(uint64x1_t a) { return vshl_n_u64(a, 1); @@ -13830,7 +13830,7 @@ uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHL_N1]] int64x1_t test_vqshl_n_s64(int64x1_t a) { return vqshl_n_s64(a, 1); @@ -13871,7 +13871,7 @@ uint64_t test_vqshld_n_u64(uint64_t a) { // CHECK-LABEL: @test_vqshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHL_N1]] uint64x1_t test_vqshl_n_u64(uint64x1_t a) { return vqshl_n_u64(a, 1); @@ -13912,7 +13912,7 @@ int64_t test_vqshlud_n_s64(int64_t a) { // CHECK-LABEL: @test_vqshlu_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHLU_N1]] uint64x1_t test_vqshlu_n_s64(int64x1_t a) { return vqshlu_n_s64(a, 1); diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c index 759a8d5cb2d8d4c..165f33a9f399fe0 100644 --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -1668,98 +1668,98 @@ poly8x16_t test_vcntq_p8(poly8x16_t a) { } // CHECK-LABEL: @test_vmvn_s8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] int8x8_t test_vmvn_s8(int8x8_t a) { return vmvn_s8(a); } // CHECK-LABEL: @test_vmvnq_s8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] int8x16_t test_vmvnq_s8(int8x16_t a) { return vmvnq_s8(a); } // CHECK-LABEL: @test_vmvn_s16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1) // CHECK: ret <4 x i16> [[NEG_I]] int16x4_t test_vmvn_s16(int16x4_t a) { return vmvn_s16(a); } // CHECK-LABEL: @test_vmvnq_s16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1) // CHECK: ret <8 x i16> [[NEG_I]] int16x8_t test_vmvnq_s16(int16x8_t a) { return vmvnq_s16(a); } // CHECK-LABEL: @test_vmvn_s32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1) // CHECK: ret <2 x i32> [[NEG_I]] int32x2_t test_vmvn_s32(int32x2_t a) { return vmvn_s32(a); } // CHECK-LABEL: @test_vmvnq_s32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1) // CHECK: ret <4 x i32> [[NEG_I]] int32x4_t test_vmvnq_s32(int32x4_t a) { return vmvnq_s32(a); } // CHECK-LABEL: @test_vmvn_u8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] uint8x8_t test_vmvn_u8(uint8x8_t a) { return vmvn_u8(a); } // CHECK-LABEL: @test_vmvnq_u8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] uint8x16_t test_vmvnq_u8(uint8x16_t a) { return vmvnq_u8(a); } // CHECK-LABEL: @test_vmvn_u16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1) // CHECK: ret <4 x i16> [[NEG_I]] uint16x4_t test_vmvn_u16(uint16x4_t a) { return vmvn_u16(a); } // CHECK-LABEL: @test_vmvnq_u16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1) // CHECK: ret <8 x i16> [[NEG_I]] uint16x8_t test_vmvnq_u16(uint16x8_t a) { return vmvnq_u16(a); } // CHECK-LABEL: @test_vmvn_u32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1) // CHECK: ret <2 x i32> [[NEG_I]] uint32x2_t test_vmvn_u32(uint32x2_t a) { return vmvn_u32(a); } // CHECK-LABEL: @test_vmvnq_u32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1) // CHECK: ret <4 x i32> [[NEG_I]] uint32x4_t test_vmvnq_u32(uint32x4_t a) { return vmvnq_u32(a); } // CHECK-LABEL: @test_vmvn_p8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] poly8x8_t test_vmvn_p8(poly8x8_t a) { return vmvn_p8(a); } // CHECK-LABEL: @test_vmvnq_p8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] poly8x16_t test_vmvnq_p8(poly8x16_t a) { return vmvnq_p8(a); @@ -2076,7 +2076,7 @@ uint32x4_t test_vqmovn_high_u64(uint32x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 8); @@ -2086,7 +2086,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 16); @@ -2096,7 +2096,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 32); @@ -2104,7 +2104,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 8); @@ -2114,7 +2114,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 16); @@ -2124,7 +2124,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 32); @@ -2133,7 +2133,7 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vshll_high_n_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_high_n_s8(int8x16_t a) { return vshll_high_n_s8(a, 8); @@ -2144,7 +2144,7 @@ int16x8_t test_vshll_high_n_s8(int8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_high_n_s16(int16x8_t a) { return vshll_high_n_s16(a, 16); @@ -2155,7 +2155,7 @@ int32x4_t test_vshll_high_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_high_n_s32(int32x4_t a) { return vshll_high_n_s32(a, 32); @@ -2164,7 +2164,7 @@ int64x2_t test_vshll_high_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshll_high_n_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { return vshll_high_n_u8(a, 8); @@ -2175,7 +2175,7 @@ uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { return vshll_high_n_u16(a, 16); @@ -2186,7 +2186,7 @@ uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { return vshll_high_n_u32(a, 32); diff --git a/clang/test/CodeGen/aarch64-neon-shifts.c b/clang/test/CodeGen/aarch64-neon-shifts.c index cf1bbef7f8ad7dc..eb169f5290d2625 100644 --- a/clang/test/CodeGen/aarch64-neon-shifts.c +++ b/clang/test/CodeGen/aarch64-neon-shifts.c @@ -7,13 +7,13 @@ uint8x8_t test_shift_vshr(uint8x8_t a) { // CHECK-LABEL: test_shift_vshr - // CHECK: %{{.*}} = lshr <8 x i8> %a, + // CHECK: %{{.*}} = lshr <8 x i8> %a, splat (i8 5) return vshr_n_u8(a, 5); } int8x8_t test_shift_vshr_smax(int8x8_t a) { // CHECK-LABEL: test_shift_vshr_smax - // CHECK: %{{.*}} = ashr <8 x i8> %a, + // CHECK: %{{.*}} = ashr <8 x i8> %a, splat (i8 7) return vshr_n_s8(a, 8); } @@ -25,14 +25,14 @@ uint8x8_t test_shift_vshr_umax(uint8x8_t a) { uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: test_shift_vsra - // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, + // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, splat (i8 5) // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]] return vsra_n_u8(a, b, 5); } int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) { // CHECK-LABEL: test_shift_vsra_smax - // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, + // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, splat (i8 7) // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]] return vsra_n_s8(a, b, 8); } diff --git a/clang/test/CodeGen/aarch64-neon-tbl.c b/clang/test/CodeGen/aarch64-neon-tbl.c index 20f3cd8d88919b8..d51f0ae2e11c5f5 100644 --- a/clang/test/CodeGen/aarch64-neon-tbl.c +++ b/clang/test/CodeGen/aarch64-neon-tbl.c @@ -245,10 +245,10 @@ int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8) // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1) // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -296,10 +296,10 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24) // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] -// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], +// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1) // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -725,10 +725,10 @@ uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8) // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1) // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -776,10 +776,10 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24) // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] -// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], +// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1) // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -1205,10 +1205,10 @@ poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8) // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1) // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -1256,10 +1256,10 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24) // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] -// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], +// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1) // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] diff --git a/clang/test/CodeGen/aarch64-poly64.c b/clang/test/CodeGen/aarch64-poly64.c index 6e8f85557913e07..f3c057ecf48c17a 100644 --- a/clang/test/CodeGen/aarch64-poly64.c +++ b/clang/test/CodeGen/aarch64-poly64.c @@ -42,7 +42,7 @@ uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbsl_p64(<1 x i64> noundef %a, <1 x i64> noundef %b, <1 x i64> noundef %c) #0 { // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] @@ -52,7 +52,7 @@ poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbslq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) #0 { // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] diff --git a/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c b/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c index 74b543b67bfba42..017c71aa41143bd 100644 --- a/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c @@ -370,7 +370,7 @@ fixed_uint64_t xor_u64(fixed_uint64_t a, fixed_uint64_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_COERCE:%.*]] = bitcast [[TMP0:%.*]] to // CHECK-NEXT: [[A:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[A_COERCE]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[NOT]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-NEXT: ret [[TMP1]] @@ -382,7 +382,7 @@ fixed_bool_t neg_bool(fixed_bool_t a) { // CHECK-LABEL: @neg_i8( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <64 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv16i8.v64i8( undef, <64 x i8> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -393,7 +393,7 @@ fixed_int8_t neg_i8(fixed_int8_t a) { // CHECK-LABEL: @neg_i16( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i16> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i16.v32i16( undef, <32 x i16> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -404,7 +404,7 @@ fixed_int16_t neg_i16(fixed_int16_t a) { // CHECK-LABEL: @neg_i32( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i32> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -415,7 +415,7 @@ fixed_int32_t neg_i32(fixed_int32_t a) { // CHECK-LABEL: @neg_i64( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i64> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i64.v8i64( undef, <8 x i64> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -426,7 +426,7 @@ fixed_int64_t neg_i64(fixed_int64_t a) { // CHECK-LABEL: @neg_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <64 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv16i8.v64i8( undef, <64 x i8> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -437,7 +437,7 @@ fixed_uint8_t neg_u8(fixed_uint8_t a) { // CHECK-LABEL: @neg_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i16> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i16.v32i16( undef, <32 x i16> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -448,7 +448,7 @@ fixed_uint16_t neg_u16(fixed_uint16_t a) { // CHECK-LABEL: @neg_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i32> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -459,7 +459,7 @@ fixed_uint32_t neg_u32(fixed_uint32_t a) { // CHECK-LABEL: @neg_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i64> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i64.v8i64( undef, <8 x i64> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c index 75fa4a2d8033a1f..4d2ef318005bd31 100644 --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c @@ -21,7 +21,7 @@ // CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> // CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> // CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] // CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half> @@ -40,7 +40,7 @@ float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { // CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> // CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> // CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] // CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half> diff --git a/clang/test/CodeGen/arm-bf16-convert-intrinsics.c b/clang/test/CodeGen/arm-bf16-convert-intrinsics.c index 9477ebdb8285af1..e2be98c086853ec 100644 --- a/clang/test/CodeGen/arm-bf16-convert-intrinsics.c +++ b/clang/test/CodeGen/arm-bf16-convert-intrinsics.c @@ -30,7 +30,7 @@ // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 16 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 16 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] @@ -43,7 +43,7 @@ // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 8 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] @@ -65,7 +65,7 @@ // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], +// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 16) // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 8 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP4]] @@ -83,7 +83,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 16 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 16 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] @@ -97,7 +97,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] @@ -138,7 +138,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> -// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], +// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16) // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]] @@ -156,7 +156,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 16 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 16 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] @@ -170,7 +170,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] @@ -211,7 +211,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> -// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], +// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16) // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]] diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c index bf56c0685bb6e48..8bb97f57e540d0f 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c @@ -84,7 +84,7 @@ int32x4_t test_vabsq_s32(int32x4_t a) // CHECK-LABEL: @test_vmvnq_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], splat (i8 -1) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vmvnq_s8(int8x16_t a) @@ -98,7 +98,7 @@ int8x16_t test_vmvnq_s8(int8x16_t a) // CHECK-LABEL: @test_vmvnq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], splat (i16 -1) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vmvnq_s16(int16x8_t a) @@ -112,7 +112,7 @@ int16x8_t test_vmvnq_s16(int16x8_t a) // CHECK-LABEL: @test_vmvnq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vmvnq_s32(int32x4_t a) @@ -126,7 +126,7 @@ int32x4_t test_vmvnq_s32(int32x4_t a) // CHECK-LABEL: @test_vmvnq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], splat (i8 -1) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // uint8x16_t test_vmvnq_u8(uint8x16_t a) @@ -140,7 +140,7 @@ uint8x16_t test_vmvnq_u8(uint8x16_t a) // CHECK-LABEL: @test_vmvnq_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], splat (i16 -1) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vmvnq_u16(uint16x8_t a) @@ -154,7 +154,7 @@ uint16x8_t test_vmvnq_u16(uint16x8_t a) // CHECK-LABEL: @test_vmvnq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vmvnq_u32(uint32x4_t a) @@ -431,9 +431,9 @@ int32x4_t test_vnegq_s32(int32x4_t a) // CHECK-LABEL: @test_vqabsq_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <16 x i8> [[A:%.*]], zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[A]], +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[A]], splat (i8 -128) // CHECK-NEXT: [[TMP2:%.*]] = sub <16 x i8> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> , <16 x i8> [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> splat (i8 127), <16 x i8> [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[TMP3]] // CHECK-NEXT: ret <16 x i8> [[TMP4]] // @@ -449,9 +449,9 @@ int8x16_t test_vqabsq_s8(int8x16_t a) // CHECK-LABEL: @test_vqabsq_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A]], +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A]], splat (i16 -32768) // CHECK-NEXT: [[TMP2:%.*]] = sub <8 x i16> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[TMP3]] // CHECK-NEXT: ret <8 x i16> [[TMP4]] // @@ -467,9 +467,9 @@ int16x8_t test_vqabsq_s16(int16x8_t a) // CHECK-LABEL: @test_vqabsq_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[A]], +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[A]], splat (i32 -2147483648) // CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[TMP3]] // CHECK-NEXT: ret <4 x i32> [[TMP4]] // @@ -484,9 +484,9 @@ int32x4_t test_vqabsq_s32(int32x4_t a) // CHECK-LABEL: @test_vqnegq_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], splat (i8 -128) // CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> , <16 x i8> [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> splat (i8 127), <16 x i8> [[TMP1]] // CHECK-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vqnegq_s8(int8x16_t a) @@ -500,9 +500,9 @@ int8x16_t test_vqnegq_s8(int8x16_t a) // CHECK-LABEL: @test_vqnegq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], splat (i16 -32768) // CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> , <8 x i16> [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP1]] // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vqnegq_s16(int16x8_t a) @@ -516,9 +516,9 @@ int16x8_t test_vqnegq_s16(int16x8_t a) // CHECK-LABEL: @test_vqnegq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], splat (i32 -2147483648) // CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> , <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP1]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vqnegq_s32(int32x4_t a) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c index b038322bae5b238..10fe02bd421d11d 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c @@ -8,7 +8,7 @@ // CHECK-LABEL: @test_vbicq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 11007) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vbicq_n_s16(int16x8_t a) @@ -22,7 +22,7 @@ int16x8_t test_vbicq_n_s16(int16x8_t a) // CHECK-LABEL: @test_vbicq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -252) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vbicq_n_s32(int32x4_t a) @@ -36,7 +36,7 @@ int32x4_t test_vbicq_n_s32(int32x4_t a) // CHECK-LABEL: @test_vbicq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -243) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vbicq_n_u16(uint16x8_t a) @@ -50,7 +50,7 @@ uint16x8_t test_vbicq_n_u16(uint16x8_t a) // CHECK-LABEL: @test_vbicq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -8193) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vbicq_n_u32(uint32x4_t a) @@ -64,7 +64,7 @@ uint32x4_t test_vbicq_n_u32(uint32x4_t a) // CHECK-LABEL: @test_vorrq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 195) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vorrq_n_s16(int16x8_t a) @@ -78,7 +78,7 @@ int16x8_t test_vorrq_n_s16(int16x8_t a) // CHECK-LABEL: @test_vorrq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 65536) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vorrq_n_s32(int32x4_t a) @@ -92,7 +92,7 @@ int32x4_t test_vorrq_n_s32(int32x4_t a) // CHECK-LABEL: @test_vorrq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 -4096) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vorrq_n_u16(uint16x8_t a) @@ -106,7 +106,7 @@ uint16x8_t test_vorrq_n_u16(uint16x8_t a) // CHECK-LABEL: @test_vorrq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 8978432) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vorrq_n_u32(uint32x4_t a) @@ -120,7 +120,7 @@ uint32x4_t test_vorrq_n_u32(uint32x4_t a) // CHECK-LABEL: @test_vmvnq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <8 x i16> +// CHECK-NEXT: ret <8 x i16> splat (i16 27391) // int16x8_t test_vmvnq_n_s16() { @@ -129,7 +129,7 @@ int16x8_t test_vmvnq_n_s16() // CHECK-LABEL: @test_vmvnq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 -5570561) // int32x4_t test_vmvnq_n_s32() { @@ -138,7 +138,7 @@ int32x4_t test_vmvnq_n_s32() // CHECK-LABEL: @test_vmvnq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <8 x i16> +// CHECK-NEXT: ret <8 x i16> splat (i16 -18689) // uint16x8_t test_vmvnq_n_u16() { @@ -147,7 +147,7 @@ uint16x8_t test_vmvnq_n_u16() // CHECK-LABEL: @test_vmvnq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 1023410175) // uint32x4_t test_vmvnq_n_u32() { @@ -158,7 +158,7 @@ uint32x4_t test_vmvnq_n_u32() // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -11265) // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] // CHECK-NEXT: ret <8 x i16> [[TMP3]] // @@ -175,7 +175,7 @@ int16x8_t test_vbicq_m_n_s16(int16x8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -13893633) // CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -192,7 +192,7 @@ int32x4_t test_vbicq_m_n_s32(int32x4_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -37) // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] // CHECK-NEXT: ret <8 x i16> [[TMP3]] // @@ -209,7 +209,7 @@ uint16x8_t test_vbicq_m_n_u16(uint16x8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -1644167169) // CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -226,7 +226,7 @@ uint32x4_t test_vbicq_m_n_u32(uint32x4_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 13568) // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] // CHECK-NEXT: ret <8 x i16> [[TMP3]] // @@ -243,7 +243,7 @@ int16x8_t test_vorrq_m_n_s16(int16x8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 654311424) // CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -260,7 +260,7 @@ int32x4_t test_vorrq_m_n_s32(int32x4_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 175) // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] // CHECK-NEXT: ret <8 x i16> [[TMP3]] // @@ -277,7 +277,7 @@ uint16x8_t test_vorrq_m_n_u16(uint16x8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 89) // CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -294,7 +294,7 @@ uint32x4_t test_vorrq_m_n_u32(uint32x4_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[INACTIVE:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 -3841), <8 x i16> [[INACTIVE:%.*]] // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p) @@ -310,7 +310,7 @@ int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[INACTIVE:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -18945), <4 x i32> [[INACTIVE:%.*]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p) @@ -326,7 +326,7 @@ int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[INACTIVE:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 23295), <8 x i16> [[INACTIVE:%.*]] // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p) @@ -342,7 +342,7 @@ uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[INACTIVE:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -63489), <4 x i32> [[INACTIVE:%.*]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p) @@ -358,7 +358,7 @@ uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> undef +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 767), <8 x i16> undef // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p) @@ -370,7 +370,7 @@ int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> undef +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -12189697), <4 x i32> undef // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p) @@ -382,7 +382,7 @@ int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> undef +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 -21505), <8 x i16> undef // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p) @@ -394,7 +394,7 @@ uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> undef +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -4865), <4 x i32> undef // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmvnq_x_n_u32(mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp index 35af174e1f6f57f..29719614d04fbce 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp +++ b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp @@ -8,7 +8,7 @@ // CHECK-LABEL: @_Z16test_vbicq_n_s1617__simd128_int16_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 11007) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vbicq_n_s16(int16x8_t a) @@ -22,7 +22,7 @@ int16x8_t test_vbicq_n_s16(int16x8_t a) // CHECK-LABEL: @_Z16test_vbicq_n_u3218__simd128_uint32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -8193) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vbicq_n_u32(uint32x4_t a) @@ -36,7 +36,7 @@ uint32x4_t test_vbicq_n_u32(uint32x4_t a) // CHECK-LABEL: @_Z16test_vorrq_n_s3217__simd128_int32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 65536) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vorrq_n_s32(int32x4_t a) @@ -50,7 +50,7 @@ int32x4_t test_vorrq_n_s32(int32x4_t a) // CHECK-LABEL: @_Z16test_vorrq_n_u1618__simd128_uint16_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 -4096) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vorrq_n_u16(uint16x8_t a) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c index dc70647a9c94d9f..fbc63983b73b32f 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c @@ -8,7 +8,7 @@ // CHECK-LABEL: @test_vbicq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], splat (i8 -1) // CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // @@ -23,7 +23,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) // CHECK-LABEL: @test_vbicq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], splat (i16 -1) // CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // @@ -38,7 +38,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) // CHECK-LABEL: @test_vbicq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1) // CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -55,7 +55,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -1) // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP0]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float> // CHECK-NEXT: ret <4 x float> [[TMP4]] diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c index 72a03ed8def3ec5..1f5faf41bfd9013 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c @@ -8,7 +8,7 @@ // CHECK-LABEL: @test_vshlq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], splat (i8 5) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vshlq_n_s8(int8x16_t a) @@ -22,7 +22,7 @@ int8x16_t test_vshlq_n_s8(int8x16_t a) // CHECK-LABEL: @test_vshlq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], splat (i16 5) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vshlq_n_s16(int16x8_t a) @@ -36,7 +36,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a) // CHECK-LABEL: @test_vshlq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], splat (i32 18) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vshlq_n_s32(int32x4_t a) @@ -92,7 +92,7 @@ int32x4_t test_vshlq_n_s32_trivial(int32x4_t a) // CHECK-LABEL: @test_vshlq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], splat (i8 3) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // uint8x16_t test_vshlq_n_u8(uint8x16_t a) @@ -106,7 +106,7 @@ uint8x16_t test_vshlq_n_u8(uint8x16_t a) // CHECK-LABEL: @test_vshlq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], splat (i16 11) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vshlq_n_u16(uint16x8_t a) @@ -120,7 +120,7 @@ uint16x8_t test_vshlq_n_u16(uint16x8_t a) // CHECK-LABEL: @test_vshlq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], splat (i32 7) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vshlq_n_u32(uint32x4_t a) @@ -176,7 +176,7 @@ uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a) // CHECK-LABEL: @test_vshrq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], splat (i8 4) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vshrq_n_s8(int8x16_t a) @@ -190,7 +190,7 @@ int8x16_t test_vshrq_n_s8(int8x16_t a) // CHECK-LABEL: @test_vshrq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], splat (i16 10) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vshrq_n_s16(int16x8_t a) @@ -204,7 +204,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a) // CHECK-LABEL: @test_vshrq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], splat (i32 19) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vshrq_n_s32(int32x4_t a) @@ -218,7 +218,7 @@ int32x4_t test_vshrq_n_s32(int32x4_t a) // CHECK-LABEL: @test_vshrq_n_s8_trivial( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], splat (i8 7) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vshrq_n_s8_trivial(int8x16_t a) @@ -232,7 +232,7 @@ int8x16_t test_vshrq_n_s8_trivial(int8x16_t a) // CHECK-LABEL: @test_vshrq_n_s16_trivial( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], splat (i16 15) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vshrq_n_s16_trivial(int16x8_t a) @@ -246,7 +246,7 @@ int16x8_t test_vshrq_n_s16_trivial(int16x8_t a) // CHECK-LABEL: @test_vshrq_n_s32_trivial( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], splat (i32 31) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vshrq_n_s32_trivial(int32x4_t a) @@ -260,7 +260,7 @@ int32x4_t test_vshrq_n_s32_trivial(int32x4_t a) // CHECK-LABEL: @test_vshrq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], splat (i8 1) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // uint8x16_t test_vshrq_n_u8(uint8x16_t a) @@ -274,7 +274,7 @@ uint8x16_t test_vshrq_n_u8(uint8x16_t a) // CHECK-LABEL: @test_vshrq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], splat (i16 10) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vshrq_n_u16(uint16x8_t a) @@ -288,7 +288,7 @@ uint16x8_t test_vshrq_n_u16(uint16x8_t a) // CHECK-LABEL: @test_vshrq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], splat (i32 10) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vshrq_n_u32(uint32x4_t a) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vornq.c b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c index f8db91cad6b8910..60a0d8835985cc1 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vornq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c @@ -8,7 +8,7 @@ // CHECK-LABEL: @test_vornq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], splat (i8 -1) // CHECK-NEXT: [[TMP1:%.*]] = or <16 x i8> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // @@ -23,7 +23,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) // CHECK-LABEL: @test_vornq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], splat (i16 -1) // CHECK-NEXT: [[TMP1:%.*]] = or <8 x i16> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // @@ -38,7 +38,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) // CHECK-LABEL: @test_vornq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1) // CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -55,7 +55,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -1) // CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP0]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float> // CHECK-NEXT: ret <4 x float> [[TMP4]] diff --git a/clang/test/CodeGen/arm-neon-shifts.c b/clang/test/CodeGen/arm-neon-shifts.c index c7f5d2611c448c9..129d385abc56d3e 100644 --- a/clang/test/CodeGen/arm-neon-shifts.c +++ b/clang/test/CodeGen/arm-neon-shifts.c @@ -9,13 +9,13 @@ uint8x8_t test_shift_vshr(uint8x8_t a) { // CHECK-LABEL: test_shift_vshr - // CHECK: %{{.*}} = lshr <8 x i8> %a, + // CHECK: %{{.*}} = lshr <8 x i8> %a, splat (i8 5) return vshr_n_u8(a, 5); } int8x8_t test_shift_vshr_smax(int8x8_t a) { // CHECK-LABEL: test_shift_vshr_smax - // CHECK: %{{.*}} = ashr <8 x i8> %a, + // CHECK: %{{.*}} = ashr <8 x i8> %a, splat (i8 7) return vshr_n_s8(a, 8); } @@ -27,14 +27,14 @@ uint8x8_t test_shift_vshr_umax(uint8x8_t a) { uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: test_shift_vsra - // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, + // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, splat (i8 5) // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]] return vsra_n_u8(a, b, 5); } int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) { // CHECK-LABEL: test_shift_vsra_smax - // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, + // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, splat (i8 7) // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]] return vsra_n_s8(a, b, 8); } diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c index 93747bc15c6addd..9f43dd2be5af589 100644 --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -583,7 +583,7 @@ uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { @@ -594,7 +594,7 @@ int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { @@ -605,7 +605,7 @@ int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { @@ -616,7 +616,7 @@ int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { @@ -627,7 +627,7 @@ uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { @@ -638,7 +638,7 @@ uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { @@ -872,7 +872,7 @@ uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vbic_s8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { @@ -880,7 +880,7 @@ int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { } // CHECK-LABEL: @test_vbic_s16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { @@ -888,7 +888,7 @@ int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { } // CHECK-LABEL: @test_vbic_s32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { @@ -896,7 +896,7 @@ int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vbic_s64( -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { @@ -904,7 +904,7 @@ int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vbic_u8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { @@ -912,7 +912,7 @@ uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { } // CHECK-LABEL: @test_vbic_u16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { @@ -920,7 +920,7 @@ uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { } // CHECK-LABEL: @test_vbic_u32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { @@ -928,7 +928,7 @@ uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vbic_u64( -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { @@ -936,7 +936,7 @@ uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vbicq_s8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { @@ -944,7 +944,7 @@ int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { } // CHECK-LABEL: @test_vbicq_s16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { @@ -952,7 +952,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { } // CHECK-LABEL: @test_vbicq_s32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { @@ -960,7 +960,7 @@ int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vbicq_s64( -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { @@ -968,7 +968,7 @@ int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vbicq_u8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { @@ -976,7 +976,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { } // CHECK-LABEL: @test_vbicq_u16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { @@ -984,7 +984,7 @@ uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { } // CHECK-LABEL: @test_vbicq_u32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { @@ -992,7 +992,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vbicq_u64( -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { @@ -8414,98 +8414,98 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { } // CHECK-LABEL: @test_vmvn_s8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] int8x8_t test_vmvn_s8(int8x8_t a) { return vmvn_s8(a); } // CHECK-LABEL: @test_vmvn_s16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1) // CHECK: ret <4 x i16> [[NEG_I]] int16x4_t test_vmvn_s16(int16x4_t a) { return vmvn_s16(a); } // CHECK-LABEL: @test_vmvn_s32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1) // CHECK: ret <2 x i32> [[NEG_I]] int32x2_t test_vmvn_s32(int32x2_t a) { return vmvn_s32(a); } // CHECK-LABEL: @test_vmvn_u8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] uint8x8_t test_vmvn_u8(uint8x8_t a) { return vmvn_u8(a); } // CHECK-LABEL: @test_vmvn_u16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1) // CHECK: ret <4 x i16> [[NEG_I]] uint16x4_t test_vmvn_u16(uint16x4_t a) { return vmvn_u16(a); } // CHECK-LABEL: @test_vmvn_u32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1) // CHECK: ret <2 x i32> [[NEG_I]] uint32x2_t test_vmvn_u32(uint32x2_t a) { return vmvn_u32(a); } // CHECK-LABEL: @test_vmvn_p8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] poly8x8_t test_vmvn_p8(poly8x8_t a) { return vmvn_p8(a); } // CHECK-LABEL: @test_vmvnq_s8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] int8x16_t test_vmvnq_s8(int8x16_t a) { return vmvnq_s8(a); } // CHECK-LABEL: @test_vmvnq_s16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1) // CHECK: ret <8 x i16> [[NEG_I]] int16x8_t test_vmvnq_s16(int16x8_t a) { return vmvnq_s16(a); } // CHECK-LABEL: @test_vmvnq_s32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1) // CHECK: ret <4 x i32> [[NEG_I]] int32x4_t test_vmvnq_s32(int32x4_t a) { return vmvnq_s32(a); } // CHECK-LABEL: @test_vmvnq_u8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] uint8x16_t test_vmvnq_u8(uint8x16_t a) { return vmvnq_u8(a); } // CHECK-LABEL: @test_vmvnq_u16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1) // CHECK: ret <8 x i16> [[NEG_I]] uint16x8_t test_vmvnq_u16(uint16x8_t a) { return vmvnq_u16(a); } // CHECK-LABEL: @test_vmvnq_u32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1) // CHECK: ret <4 x i32> [[NEG_I]] uint32x4_t test_vmvnq_u32(uint32x4_t a) { return vmvnq_u32(a); } // CHECK-LABEL: @test_vmvnq_p8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] poly8x16_t test_vmvnq_p8(poly8x16_t a) { return vmvnq_p8(a); @@ -8568,7 +8568,7 @@ float32x4_t test_vnegq_f32(float32x4_t a) { } // CHECK-LABEL: @test_vorn_s8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { @@ -8576,7 +8576,7 @@ int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { } // CHECK-LABEL: @test_vorn_s16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { @@ -8584,7 +8584,7 @@ int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { } // CHECK-LABEL: @test_vorn_s32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { @@ -8592,7 +8592,7 @@ int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vorn_s64( -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { @@ -8600,7 +8600,7 @@ int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vorn_u8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { @@ -8608,7 +8608,7 @@ uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { } // CHECK-LABEL: @test_vorn_u16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { @@ -8616,7 +8616,7 @@ uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { } // CHECK-LABEL: @test_vorn_u32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { @@ -8624,7 +8624,7 @@ uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vorn_u64( -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { @@ -8632,7 +8632,7 @@ uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vornq_s8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { @@ -8640,7 +8640,7 @@ int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { } // CHECK-LABEL: @test_vornq_s16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { @@ -8648,7 +8648,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { } // CHECK-LABEL: @test_vornq_s32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { @@ -8656,7 +8656,7 @@ int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vornq_s64( -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { @@ -8664,7 +8664,7 @@ int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vornq_u8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { @@ -8672,7 +8672,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { } // CHECK-LABEL: @test_vornq_u16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { @@ -8680,7 +8680,7 @@ uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { } // CHECK-LABEL: @test_vornq_u32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { @@ -8688,7 +8688,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vornq_u64( -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { @@ -10200,7 +10200,7 @@ uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vqrshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQRSHRN_N1]] int8x8_t test_vqrshrn_n_s16(int16x8_t a) { return vqrshrn_n_s16(a, 1); @@ -10209,7 +10209,7 @@ int8x8_t test_vqrshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqrshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQRSHRN_N1]] int16x4_t test_vqrshrn_n_s32(int32x4_t a) { return vqrshrn_n_s32(a, 1); @@ -10218,7 +10218,7 @@ int16x4_t test_vqrshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqrshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQRSHRN_N1]] int32x2_t test_vqrshrn_n_s64(int64x2_t a) { return vqrshrn_n_s64(a, 1); @@ -10227,7 +10227,7 @@ int32x2_t test_vqrshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vqrshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQRSHRN_N1]] uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { return vqrshrn_n_u16(a, 1); @@ -10236,7 +10236,7 @@ uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vqrshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQRSHRN_N1]] uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { return vqrshrn_n_u32(a, 1); @@ -10245,7 +10245,7 @@ uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vqrshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQRSHRN_N1]] uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { return vqrshrn_n_u64(a, 1); @@ -10254,7 +10254,7 @@ uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqrshrun_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> ) +// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQRSHRUN_N1]] uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { return vqrshrun_n_s16(a, 1); @@ -10263,7 +10263,7 @@ uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqrshrun_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> ) +// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQRSHRUN_N1]] uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { return vqrshrun_n_s32(a, 1); @@ -10272,7 +10272,7 @@ uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqrshrun_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> ) +// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQRSHRUN_N1]] uint32x2_t test_vqrshrun_n_s64(int64x2_t a) { return vqrshrun_n_s64(a, 1); @@ -10427,7 +10427,7 @@ uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqshlu_n_s8( -// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1) // CHECK: ret <8 x i8> [[VQSHLU_N]] uint8x8_t test_vqshlu_n_s8(int8x8_t a) { return vqshlu_n_s8(a, 1); @@ -10436,7 +10436,7 @@ uint8x8_t test_vqshlu_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vqshlu_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 1) // CHECK: ret <4 x i16> [[VQSHLU_N1]] uint16x4_t test_vqshlu_n_s16(int16x4_t a) { return vqshlu_n_s16(a, 1); @@ -10445,7 +10445,7 @@ uint16x4_t test_vqshlu_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vqshlu_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VQSHLU_N1]] uint32x2_t test_vqshlu_n_s32(int32x2_t a) { return vqshlu_n_s32(a, 1); @@ -10454,14 +10454,14 @@ uint32x2_t test_vqshlu_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vqshlu_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHLU_N1]] uint64x1_t test_vqshlu_n_s64(int64x1_t a) { return vqshlu_n_s64(a, 1); } // CHECK-LABEL: @test_vqshluq_n_s8( -// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VQSHLU_N]] uint8x16_t test_vqshluq_n_s8(int8x16_t a) { return vqshluq_n_s8(a, 1); @@ -10470,7 +10470,7 @@ uint8x16_t test_vqshluq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vqshluq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VQSHLU_N1]] uint16x8_t test_vqshluq_n_s16(int16x8_t a) { return vqshluq_n_s16(a, 1); @@ -10479,7 +10479,7 @@ uint16x8_t test_vqshluq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshluq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VQSHLU_N1]] uint32x4_t test_vqshluq_n_s32(int32x4_t a) { return vqshluq_n_s32(a, 1); @@ -10488,14 +10488,14 @@ uint32x4_t test_vqshluq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshluq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VQSHLU_N1]] uint64x2_t test_vqshluq_n_s64(int64x2_t a) { return vqshluq_n_s64(a, 1); } // CHECK-LABEL: @test_vqshl_n_s8( -// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VQSHL_N]] int8x8_t test_vqshl_n_s8(int8x8_t a) { return vqshl_n_s8(a, 1); @@ -10504,7 +10504,7 @@ int8x8_t test_vqshl_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vqshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> ) +// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VQSHL_N1]] int16x4_t test_vqshl_n_s16(int16x4_t a) { return vqshl_n_s16(a, 1); @@ -10513,7 +10513,7 @@ int16x4_t test_vqshl_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vqshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> ) +// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VQSHL_N1]] int32x2_t test_vqshl_n_s32(int32x2_t a) { return vqshl_n_s32(a, 1); @@ -10522,14 +10522,14 @@ int32x2_t test_vqshl_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vqshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHL_N1]] int64x1_t test_vqshl_n_s64(int64x1_t a) { return vqshl_n_s64(a, 1); } // CHECK-LABEL: @test_vqshl_n_u8( -// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VQSHL_N]] uint8x8_t test_vqshl_n_u8(uint8x8_t a) { return vqshl_n_u8(a, 1); @@ -10538,7 +10538,7 @@ uint8x8_t test_vqshl_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vqshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> ) +// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VQSHL_N1]] uint16x4_t test_vqshl_n_u16(uint16x4_t a) { return vqshl_n_u16(a, 1); @@ -10547,7 +10547,7 @@ uint16x4_t test_vqshl_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vqshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> ) +// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VQSHL_N1]] uint32x2_t test_vqshl_n_u32(uint32x2_t a) { return vqshl_n_u32(a, 1); @@ -10556,14 +10556,14 @@ uint32x2_t test_vqshl_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vqshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHL_N1]] uint64x1_t test_vqshl_n_u64(uint64x1_t a) { return vqshl_n_u64(a, 1); } // CHECK-LABEL: @test_vqshlq_n_s8( -// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VQSHL_N]] int8x16_t test_vqshlq_n_s8(int8x16_t a) { return vqshlq_n_s8(a, 1); @@ -10572,7 +10572,7 @@ int8x16_t test_vqshlq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vqshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> ) +// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VQSHL_N1]] int16x8_t test_vqshlq_n_s16(int16x8_t a) { return vqshlq_n_s16(a, 1); @@ -10581,7 +10581,7 @@ int16x8_t test_vqshlq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> ) +// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VQSHL_N1]] int32x4_t test_vqshlq_n_s32(int32x4_t a) { return vqshlq_n_s32(a, 1); @@ -10590,14 +10590,14 @@ int32x4_t test_vqshlq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VQSHL_N1]] int64x2_t test_vqshlq_n_s64(int64x2_t a) { return vqshlq_n_s64(a, 1); } // CHECK-LABEL: @test_vqshlq_n_u8( -// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VQSHL_N]] uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { return vqshlq_n_u8(a, 1); @@ -10606,7 +10606,7 @@ uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vqshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> ) +// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VQSHL_N1]] uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { return vqshlq_n_u16(a, 1); @@ -10615,7 +10615,7 @@ uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vqshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> ) +// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VQSHL_N1]] uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { return vqshlq_n_u32(a, 1); @@ -10624,7 +10624,7 @@ uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vqshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VQSHL_N1]] uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { return vqshlq_n_u64(a, 1); @@ -10633,7 +10633,7 @@ uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQSHRN_N1]] int8x8_t test_vqshrn_n_s16(int16x8_t a) { return vqshrn_n_s16(a, 1); @@ -10642,7 +10642,7 @@ int8x8_t test_vqshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQSHRN_N1]] int16x4_t test_vqshrn_n_s32(int32x4_t a) { return vqshrn_n_s32(a, 1); @@ -10651,7 +10651,7 @@ int16x4_t test_vqshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQSHRN_N1]] int32x2_t test_vqshrn_n_s64(int64x2_t a) { return vqshrn_n_s64(a, 1); @@ -10660,7 +10660,7 @@ int32x2_t test_vqshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vqshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQSHRN_N1]] uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { return vqshrn_n_u16(a, 1); @@ -10669,7 +10669,7 @@ uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vqshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQSHRN_N1]] uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { return vqshrn_n_u32(a, 1); @@ -10678,7 +10678,7 @@ uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vqshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQSHRN_N1]] uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { return vqshrn_n_u64(a, 1); @@ -10687,7 +10687,7 @@ uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqshrun_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> ) +// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQSHRUN_N1]] uint8x8_t test_vqshrun_n_s16(int16x8_t a) { return vqshrun_n_s16(a, 1); @@ -10696,7 +10696,7 @@ uint8x8_t test_vqshrun_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshrun_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> ) +// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQSHRUN_N1]] uint16x4_t test_vqshrun_n_s32(int32x4_t a) { return vqshrun_n_s32(a, 1); @@ -10705,7 +10705,7 @@ uint16x4_t test_vqshrun_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshrun_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> ) +// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQSHRUN_N1]] uint32x2_t test_vqshrun_n_s64(int64x2_t a) { return vqshrun_n_s64(a, 1); @@ -13296,7 +13296,7 @@ uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vrshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VRSHRN_N1]] int8x8_t test_vrshrn_n_s16(int16x8_t a) { return vrshrn_n_s16(a, 1); @@ -13305,7 +13305,7 @@ int8x8_t test_vrshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vrshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VRSHRN_N1]] int16x4_t test_vrshrn_n_s32(int32x4_t a) { return vrshrn_n_s32(a, 1); @@ -13314,7 +13314,7 @@ int16x4_t test_vrshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vrshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VRSHRN_N1]] int32x2_t test_vrshrn_n_s64(int64x2_t a) { return vrshrn_n_s64(a, 1); @@ -13323,7 +13323,7 @@ int32x2_t test_vrshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vrshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VRSHRN_N1]] uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { return vrshrn_n_u16(a, 1); @@ -13332,7 +13332,7 @@ uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vrshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VRSHRN_N1]] uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { return vrshrn_n_u32(a, 1); @@ -13341,14 +13341,14 @@ uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vrshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VRSHRN_N1]] uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { return vrshrn_n_u64(a, 1); } // CHECK-LABEL: @test_vrshr_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VRSHR_N]] int8x8_t test_vrshr_n_s8(int8x8_t a) { return vrshr_n_s8(a, 1); @@ -13357,7 +13357,7 @@ int8x8_t test_vrshr_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vrshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VRSHR_N1]] int16x4_t test_vrshr_n_s16(int16x4_t a) { return vrshr_n_s16(a, 1); @@ -13366,7 +13366,7 @@ int16x4_t test_vrshr_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vrshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -1)) // CHECK: ret <2 x i32> [[VRSHR_N1]] int32x2_t test_vrshr_n_s32(int32x2_t a) { return vrshr_n_s32(a, 1); @@ -13375,14 +13375,14 @@ int32x2_t test_vrshr_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vrshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VRSHR_N1]] int64x1_t test_vrshr_n_s64(int64x1_t a) { return vrshr_n_s64(a, 1); } // CHECK-LABEL: @test_vrshr_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VRSHR_N]] uint8x8_t test_vrshr_n_u8(uint8x8_t a) { return vrshr_n_u8(a, 1); @@ -13391,7 +13391,7 @@ uint8x8_t test_vrshr_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vrshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VRSHR_N1]] uint16x4_t test_vrshr_n_u16(uint16x4_t a) { return vrshr_n_u16(a, 1); @@ -13400,7 +13400,7 @@ uint16x4_t test_vrshr_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vrshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -1)) // CHECK: ret <2 x i32> [[VRSHR_N1]] uint32x2_t test_vrshr_n_u32(uint32x2_t a) { return vrshr_n_u32(a, 1); @@ -13409,14 +13409,14 @@ uint32x2_t test_vrshr_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vrshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VRSHR_N1]] uint64x1_t test_vrshr_n_u64(uint64x1_t a) { return vrshr_n_u64(a, 1); } // CHECK-LABEL: @test_vrshrq_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VRSHR_N]] int8x16_t test_vrshrq_n_s8(int8x16_t a) { return vrshrq_n_s8(a, 1); @@ -13425,7 +13425,7 @@ int8x16_t test_vrshrq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vrshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VRSHR_N1]] int16x8_t test_vrshrq_n_s16(int16x8_t a) { return vrshrq_n_s16(a, 1); @@ -13434,7 +13434,7 @@ int16x8_t test_vrshrq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vrshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i32> [[VRSHR_N1]] int32x4_t test_vrshrq_n_s32(int32x4_t a) { return vrshrq_n_s32(a, 1); @@ -13443,14 +13443,14 @@ int32x4_t test_vrshrq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vrshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i64> [[VRSHR_N1]] int64x2_t test_vrshrq_n_s64(int64x2_t a) { return vrshrq_n_s64(a, 1); } // CHECK-LABEL: @test_vrshrq_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VRSHR_N]] uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { return vrshrq_n_u8(a, 1); @@ -13459,7 +13459,7 @@ uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vrshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VRSHR_N1]] uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { return vrshrq_n_u16(a, 1); @@ -13468,7 +13468,7 @@ uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vrshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i32> [[VRSHR_N1]] uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { return vrshrq_n_u32(a, 1); @@ -13477,7 +13477,7 @@ uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vrshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i64> [[VRSHR_N1]] uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { return vrshrq_n_u64(a, 1); @@ -13536,7 +13536,7 @@ float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) { } // CHECK-LABEL: @test_vrsra_n_s8( -// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] // CHECK: ret <8 x i8> [[VRSRA_N]] int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { @@ -13548,7 +13548,7 @@ int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> ) +// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> splat (i16 -1)) // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]] // CHECK: ret <4 x i16> [[VRSRA_N]] int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { @@ -13560,7 +13560,7 @@ int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> ) +// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> splat (i32 -1)) // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] // CHECK: ret <2 x i32> [[VRSRA_N]] int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { @@ -13572,7 +13572,7 @@ int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> ) +// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> splat (i64 -1)) // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]] // CHECK: ret <1 x i64> [[VRSRA_N]] int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { @@ -13580,7 +13580,7 @@ int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vrsra_n_u8( -// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] // CHECK: ret <8 x i8> [[VRSRA_N]] uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { @@ -13592,7 +13592,7 @@ uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> ) +// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> splat (i16 -1)) // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]] // CHECK: ret <4 x i16> [[VRSRA_N]] uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { @@ -13604,7 +13604,7 @@ uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> ) +// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> splat (i32 -1)) // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] // CHECK: ret <2 x i32> [[VRSRA_N]] uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { @@ -13616,7 +13616,7 @@ uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> ) +// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> splat (i64 -1)) // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]] // CHECK: ret <1 x i64> [[VRSRA_N]] uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { @@ -13624,7 +13624,7 @@ uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vrsraq_n_s8( -// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] // CHECK: ret <16 x i8> [[VRSRA_N]] int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { @@ -13636,7 +13636,7 @@ int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> ) +// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> splat (i16 -1)) // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] // CHECK: ret <8 x i16> [[VRSRA_N]] int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { @@ -13648,7 +13648,7 @@ int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> ) +// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> splat (i32 -1)) // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] // CHECK: ret <4 x i32> [[VRSRA_N]] int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { @@ -13660,7 +13660,7 @@ int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> ) +// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 -1)) // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] // CHECK: ret <2 x i64> [[VRSRA_N]] int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { @@ -13668,7 +13668,7 @@ int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vrsraq_n_u8( -// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] // CHECK: ret <16 x i8> [[VRSRA_N]] uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { @@ -13680,7 +13680,7 @@ uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> ) +// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> splat (i16 -1)) // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] // CHECK: ret <8 x i16> [[VRSRA_N]] uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { @@ -13692,7 +13692,7 @@ uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> ) +// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> splat (i32 -1)) // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] // CHECK: ret <4 x i32> [[VRSRA_N]] uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { @@ -13704,7 +13704,7 @@ uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> ) +// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 -1)) // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] // CHECK: ret <2 x i64> [[VRSRA_N]] uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { @@ -14107,7 +14107,7 @@ uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 1); @@ -14117,7 +14117,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 1); @@ -14127,7 +14127,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 1); @@ -14135,7 +14135,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 1); @@ -14145,7 +14145,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 1); @@ -14155,14 +14155,14 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 1); } // CHECK-LABEL: @test_vshl_n_s8( -// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 1) // CHECK: ret <8 x i8> [[VSHL_N]] int8x8_t test_vshl_n_s8(int8x8_t a) { return vshl_n_s8(a, 1); @@ -14171,7 +14171,7 @@ int8x8_t test_vshl_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <4 x i16> [[VSHL_N]] int16x4_t test_vshl_n_s16(int16x4_t a) { return vshl_n_s16(a, 1); @@ -14180,7 +14180,7 @@ int16x4_t test_vshl_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <2 x i32> [[VSHL_N]] int32x2_t test_vshl_n_s32(int32x2_t a) { return vshl_n_s32(a, 1); @@ -14189,14 +14189,14 @@ int32x2_t test_vshl_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHL_N]] int64x1_t test_vshl_n_s64(int64x1_t a) { return vshl_n_s64(a, 1); } // CHECK-LABEL: @test_vshl_n_u8( -// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 1) // CHECK: ret <8 x i8> [[VSHL_N]] uint8x8_t test_vshl_n_u8(uint8x8_t a) { return vshl_n_u8(a, 1); @@ -14205,7 +14205,7 @@ uint8x8_t test_vshl_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <4 x i16> [[VSHL_N]] uint16x4_t test_vshl_n_u16(uint16x4_t a) { return vshl_n_u16(a, 1); @@ -14214,7 +14214,7 @@ uint16x4_t test_vshl_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <2 x i32> [[VSHL_N]] uint32x2_t test_vshl_n_u32(uint32x2_t a) { return vshl_n_u32(a, 1); @@ -14223,14 +14223,14 @@ uint32x2_t test_vshl_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHL_N]] uint64x1_t test_vshl_n_u64(uint64x1_t a) { return vshl_n_u64(a, 1); } // CHECK-LABEL: @test_vshlq_n_s8( -// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 1) // CHECK: ret <16 x i8> [[VSHL_N]] int8x16_t test_vshlq_n_s8(int8x16_t a) { return vshlq_n_s8(a, 1); @@ -14239,7 +14239,7 @@ int8x16_t test_vshlq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHL_N]] int16x8_t test_vshlq_n_s16(int16x8_t a) { return vshlq_n_s16(a, 1); @@ -14248,7 +14248,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHL_N]] int32x4_t test_vshlq_n_s32(int32x4_t a) { return vshlq_n_s32(a, 1); @@ -14257,14 +14257,14 @@ int32x4_t test_vshlq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHL_N]] int64x2_t test_vshlq_n_s64(int64x2_t a) { return vshlq_n_s64(a, 1); } // CHECK-LABEL: @test_vshlq_n_u8( -// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 1) // CHECK: ret <16 x i8> [[VSHL_N]] uint8x16_t test_vshlq_n_u8(uint8x16_t a) { return vshlq_n_u8(a, 1); @@ -14273,7 +14273,7 @@ uint8x16_t test_vshlq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHL_N]] uint16x8_t test_vshlq_n_u16(uint16x8_t a) { return vshlq_n_u16(a, 1); @@ -14282,7 +14282,7 @@ uint16x8_t test_vshlq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHL_N]] uint32x4_t test_vshlq_n_u32(uint32x4_t a) { return vshlq_n_u32(a, 1); @@ -14291,7 +14291,7 @@ uint32x4_t test_vshlq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHL_N]] uint64x2_t test_vshlq_n_u64(uint64x2_t a) { return vshlq_n_u64(a, 1); @@ -14300,7 +14300,7 @@ uint64x2_t test_vshlq_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: ret <8 x i8> [[VSHRN_N]] int8x8_t test_vshrn_n_s16(int16x8_t a) { @@ -14310,7 +14310,7 @@ int8x8_t test_vshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: ret <4 x i16> [[VSHRN_N]] int16x4_t test_vshrn_n_s32(int32x4_t a) { @@ -14320,7 +14320,7 @@ int16x4_t test_vshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: ret <2 x i32> [[VSHRN_N]] int32x2_t test_vshrn_n_s64(int64x2_t a) { @@ -14330,7 +14330,7 @@ int32x2_t test_vshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: ret <8 x i8> [[VSHRN_N]] uint8x8_t test_vshrn_n_u16(uint16x8_t a) { @@ -14340,7 +14340,7 @@ uint8x8_t test_vshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: ret <4 x i16> [[VSHRN_N]] uint16x4_t test_vshrn_n_u32(uint32x4_t a) { @@ -14350,7 +14350,7 @@ uint16x4_t test_vshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: ret <2 x i32> [[VSHRN_N]] uint32x2_t test_vshrn_n_u64(uint64x2_t a) { @@ -14358,7 +14358,7 @@ uint32x2_t test_vshrn_n_u64(uint64x2_t a) { } // CHECK-LABEL: @test_vshr_n_s8( -// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, splat (i8 1) // CHECK: ret <8 x i8> [[VSHR_N]] int8x8_t test_vshr_n_s8(int8x8_t a) { return vshr_n_s8(a, 1); @@ -14367,7 +14367,7 @@ int8x8_t test_vshr_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <4 x i16> [[VSHR_N]] int16x4_t test_vshr_n_s16(int16x4_t a) { return vshr_n_s16(a, 1); @@ -14376,7 +14376,7 @@ int16x4_t test_vshr_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <2 x i32> [[VSHR_N]] int32x2_t test_vshr_n_s32(int32x2_t a) { return vshr_n_s32(a, 1); @@ -14385,14 +14385,14 @@ int32x2_t test_vshr_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHR_N]] int64x1_t test_vshr_n_s64(int64x1_t a) { return vshr_n_s64(a, 1); } // CHECK-LABEL: @test_vshr_n_u8( -// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, splat (i8 1) // CHECK: ret <8 x i8> [[VSHR_N]] uint8x8_t test_vshr_n_u8(uint8x8_t a) { return vshr_n_u8(a, 1); @@ -14401,7 +14401,7 @@ uint8x8_t test_vshr_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <4 x i16> [[VSHR_N]] uint16x4_t test_vshr_n_u16(uint16x4_t a) { return vshr_n_u16(a, 1); @@ -14410,7 +14410,7 @@ uint16x4_t test_vshr_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <2 x i32> [[VSHR_N]] uint32x2_t test_vshr_n_u32(uint32x2_t a) { return vshr_n_u32(a, 1); @@ -14419,14 +14419,14 @@ uint32x2_t test_vshr_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHR_N]] uint64x1_t test_vshr_n_u64(uint64x1_t a) { return vshr_n_u64(a, 1); } // CHECK-LABEL: @test_vshrq_n_s8( -// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, splat (i8 1) // CHECK: ret <16 x i8> [[VSHR_N]] int8x16_t test_vshrq_n_s8(int8x16_t a) { return vshrq_n_s8(a, 1); @@ -14435,7 +14435,7 @@ int8x16_t test_vshrq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHR_N]] int16x8_t test_vshrq_n_s16(int16x8_t a) { return vshrq_n_s16(a, 1); @@ -14444,7 +14444,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHR_N]] int32x4_t test_vshrq_n_s32(int32x4_t a) { return vshrq_n_s32(a, 1); @@ -14453,14 +14453,14 @@ int32x4_t test_vshrq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHR_N]] int64x2_t test_vshrq_n_s64(int64x2_t a) { return vshrq_n_s64(a, 1); } // CHECK-LABEL: @test_vshrq_n_u8( -// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, splat (i8 1) // CHECK: ret <16 x i8> [[VSHR_N]] uint8x16_t test_vshrq_n_u8(uint8x16_t a) { return vshrq_n_u8(a, 1); @@ -14469,7 +14469,7 @@ uint8x16_t test_vshrq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHR_N]] uint16x8_t test_vshrq_n_u16(uint16x8_t a) { return vshrq_n_u16(a, 1); @@ -14478,7 +14478,7 @@ uint16x8_t test_vshrq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHR_N]] uint32x4_t test_vshrq_n_u32(uint32x4_t a) { return vshrq_n_u32(a, 1); @@ -14487,14 +14487,14 @@ uint32x4_t test_vshrq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHR_N]] uint64x2_t test_vshrq_n_u64(uint64x2_t a) { return vshrq_n_u64(a, 1); } // CHECK-LABEL: @test_vsli_n_s8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VSLI_N]] int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { return vsli_n_s8(a, b, 1); @@ -14505,7 +14505,7 @@ int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VSLI_N2]] int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { return vsli_n_s16(a, b, 1); @@ -14516,7 +14516,7 @@ int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VSLI_N2]] int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { return vsli_n_s32(a, b, 1); @@ -14527,14 +14527,14 @@ int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VSLI_N2]] int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { return vsli_n_s64(a, b, 1); } // CHECK-LABEL: @test_vsli_n_u8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VSLI_N]] uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { return vsli_n_u8(a, b, 1); @@ -14545,7 +14545,7 @@ uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VSLI_N2]] uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { return vsli_n_u16(a, b, 1); @@ -14556,7 +14556,7 @@ uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VSLI_N2]] uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { return vsli_n_u32(a, b, 1); @@ -14567,14 +14567,14 @@ uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VSLI_N2]] uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { return vsli_n_u64(a, b, 1); } // CHECK-LABEL: @test_vsli_n_p8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VSLI_N]] poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { return vsli_n_p8(a, b, 1); @@ -14585,14 +14585,14 @@ poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VSLI_N2]] poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { return vsli_n_p16(a, b, 1); } // CHECK-LABEL: @test_vsliq_n_s8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VSLI_N]] int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { return vsliq_n_s8(a, b, 1); @@ -14603,7 +14603,7 @@ int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VSLI_N2]] int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { return vsliq_n_s16(a, b, 1); @@ -14614,7 +14614,7 @@ int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VSLI_N2]] int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { return vsliq_n_s32(a, b, 1); @@ -14625,14 +14625,14 @@ int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VSLI_N2]] int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { return vsliq_n_s64(a, b, 1); } // CHECK-LABEL: @test_vsliq_n_u8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VSLI_N]] uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { return vsliq_n_u8(a, b, 1); @@ -14643,7 +14643,7 @@ uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VSLI_N2]] uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { return vsliq_n_u16(a, b, 1); @@ -14654,7 +14654,7 @@ uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VSLI_N2]] uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { return vsliq_n_u32(a, b, 1); @@ -14665,14 +14665,14 @@ uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VSLI_N2]] uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { return vsliq_n_u64(a, b, 1); } // CHECK-LABEL: @test_vsliq_n_p8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VSLI_N]] poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { return vsliq_n_p8(a, b, 1); @@ -14683,14 +14683,14 @@ poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VSLI_N2]] poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { return vsliq_n_p16(a, b, 1); } // CHECK-LABEL: @test_vsra_n_s8( -// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, splat (i8 1) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { @@ -14702,7 +14702,7 @@ int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 1) // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { @@ -14714,7 +14714,7 @@ int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 1) // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { @@ -14726,7 +14726,7 @@ int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <1 x i64> [[TMP4]] int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { @@ -14734,7 +14734,7 @@ int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vsra_n_u8( -// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, splat (i8 1) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { @@ -14746,7 +14746,7 @@ uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 1) // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { @@ -14758,7 +14758,7 @@ uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 1) // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { @@ -14770,7 +14770,7 @@ uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <1 x i64> [[TMP4]] uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { @@ -14778,7 +14778,7 @@ uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vsraq_n_s8( -// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, splat (i8 1) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { @@ -14790,7 +14790,7 @@ int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 1) // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { @@ -14802,7 +14802,7 @@ int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 1) // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { @@ -14814,7 +14814,7 @@ int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { @@ -14822,7 +14822,7 @@ int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vsraq_n_u8( -// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, splat (i8 1) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { @@ -14834,7 +14834,7 @@ uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 1) // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { @@ -14846,7 +14846,7 @@ uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 1) // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { @@ -14858,7 +14858,7 @@ uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { @@ -14866,7 +14866,7 @@ uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vsri_n_s8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VSLI_N]] int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { return vsri_n_s8(a, b, 1); @@ -14877,7 +14877,7 @@ int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VSLI_N2]] int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { return vsri_n_s16(a, b, 1); @@ -14888,7 +14888,7 @@ int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 -1)) // CHECK: ret <2 x i32> [[VSLI_N2]] int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { return vsri_n_s32(a, b, 1); @@ -14899,14 +14899,14 @@ int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VSLI_N2]] int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { return vsri_n_s64(a, b, 1); } // CHECK-LABEL: @test_vsri_n_u8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VSLI_N]] uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { return vsri_n_u8(a, b, 1); @@ -14917,7 +14917,7 @@ uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VSLI_N2]] uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { return vsri_n_u16(a, b, 1); @@ -14928,7 +14928,7 @@ uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 -1)) // CHECK: ret <2 x i32> [[VSLI_N2]] uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { return vsri_n_u32(a, b, 1); @@ -14939,14 +14939,14 @@ uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VSLI_N2]] uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { return vsri_n_u64(a, b, 1); } // CHECK-LABEL: @test_vsri_n_p8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VSLI_N]] poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { return vsri_n_p8(a, b, 1); @@ -14957,14 +14957,14 @@ poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VSLI_N2]] poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { return vsri_n_p16(a, b, 1); } // CHECK-LABEL: @test_vsriq_n_s8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VSLI_N]] int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { return vsriq_n_s8(a, b, 1); @@ -14975,7 +14975,7 @@ int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VSLI_N2]] int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { return vsriq_n_s16(a, b, 1); @@ -14986,7 +14986,7 @@ int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i32> [[VSLI_N2]] int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { return vsriq_n_s32(a, b, 1); @@ -14997,14 +14997,14 @@ int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i64> [[VSLI_N2]] int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { return vsriq_n_s64(a, b, 1); } // CHECK-LABEL: @test_vsriq_n_u8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VSLI_N]] uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { return vsriq_n_u8(a, b, 1); @@ -15015,7 +15015,7 @@ uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VSLI_N2]] uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { return vsriq_n_u16(a, b, 1); @@ -15026,7 +15026,7 @@ uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i32> [[VSLI_N2]] uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { return vsriq_n_u32(a, b, 1); @@ -15037,14 +15037,14 @@ uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i64> [[VSLI_N2]] uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) { return vsriq_n_u64(a, b, 1); } // CHECK-LABEL: @test_vsriq_n_p8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VSLI_N]] poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { return vsriq_n_p8(a, b, 1); @@ -15055,7 +15055,7 @@ poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VSLI_N2]] poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { return vsriq_n_p16(a, b, 1); @@ -18620,7 +18620,7 @@ uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { @@ -18631,7 +18631,7 @@ int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { @@ -18642,7 +18642,7 @@ int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { @@ -18653,7 +18653,7 @@ int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { @@ -18664,7 +18664,7 @@ uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { @@ -18675,7 +18675,7 @@ uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index 2850125a54c44a0..748b5e4add7c768 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -962,7 +962,7 @@ void test_builtin_elementwise_copysign(float f1, float f2, double d1, double d2, f1 = __builtin_elementwise_copysign(2.0f, f1); // CHECK: [[V2F64:%.+]] = load <2 x double>, ptr %v2f64.addr, align 16 - // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> , <2 x double> [[V2F64]]) + // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 1.000000e+00), <2 x double> [[V2F64]]) v2f64 = __builtin_elementwise_copysign((double2)1.0, v2f64); } @@ -1035,7 +1035,7 @@ void test_builtin_elementwise_fma(float f32, double f64, // CHECK: [[V2F16_0:%.+]] = load <2 x half>, ptr %v2f16.addr // CHECK-NEXT: [[V2F16_1:%.+]] = load <2 x half>, ptr %v2f16.addr - // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> ) + // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> splat (half 0xH4400)) half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0); } diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 3406cbdde2bf880..163aee4799ff0e3 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -999,13 +999,13 @@ __device__ void nvvm_cvt_sm89() { // CHECK_PTX81_SM89: call i16 @llvm.nvvm.ff.to.e5m2x2.rn.relu(float 1.000000e+00, float 1.000000e+00) __nvvm_ff_to_e5m2x2_rn_relu(1.0f, 1.0f); - // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> ) + // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> splat (half 0xH3C00)) __nvvm_f16x2_to_e4m3x2_rn({1.0f16, 1.0f16}); - // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn.relu(<2 x half> ) + // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn.relu(<2 x half> splat (half 0xH3C00)) __nvvm_f16x2_to_e4m3x2_rn_relu({1.0f16, 1.0f16}); - // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> ) + // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> splat (half 0xH3C00)) __nvvm_f16x2_to_e5m2x2_rn({1.0f16, 1.0f16}); - // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn.relu(<2 x half> ) + // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn.relu(<2 x half> splat (half 0xH3C00)) __nvvm_f16x2_to_e5m2x2_rn_relu({1.0f16, 1.0f16}); // CHECK_PTX81_SM89: call <2 x half> @llvm.nvvm.e4m3x2.to.f16x2.rn(i16 18504) @@ -1035,12 +1035,12 @@ __device__ void nvvm_abs_neg_bf16_bf16x2_sm80() { // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.abs.bf16(bfloat 0xR3DCD) __nvvm_abs_bf16(BF16); - // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> ) + // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> splat (bfloat 0xR3DCD)) __nvvm_abs_bf16x2(BF16X2); // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.neg.bf16(bfloat 0xR3DCD) __nvvm_neg_bf16(BF16); - // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.neg.bf16x2(<2 x bfloat> ) + // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.neg.bf16x2(<2 x bfloat> splat (bfloat 0xR3DCD)) __nvvm_neg_bf16x2(BF16X2); #endif // CHECK: ret void diff --git a/clang/test/CodeGen/builtinshufflevector2.c b/clang/test/CodeGen/builtinshufflevector2.c index da957503f86b652..3d3ababd167c4f8 100644 --- a/clang/test/CodeGen/builtinshufflevector2.c +++ b/clang/test/CodeGen/builtinshufflevector2.c @@ -5,7 +5,7 @@ typedef unsigned int uint4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v( void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) { -// CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, +// CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, splat (i32 3) // CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 0 // CHECK: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i{{[0-9]+}} [[I]] // diff --git a/clang/test/CodeGen/const-init.c b/clang/test/CodeGen/const-init.c index fc973cb983a80a8..175d221ad410a65 100644 --- a/clang/test/CodeGen/const-init.c +++ b/clang/test/CodeGen/const-init.c @@ -139,7 +139,7 @@ void g28(void) { typedef long long v1i64 __attribute((vector_size(8))); typedef short v12i16 __attribute((vector_size(24))); typedef long double v2f80 __attribute((vector_size(24))); - // CHECK: @g28.a = internal global <1 x i64> + // CHECK: @g28.a = internal global <1 x i64> splat (i64 10) // @g28.b = internal global <12 x i16> // @g28.c = internal global <2 x x86_fp80> , align 32 static v1i64 a = (v1i64)10LL; diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c index c5886858515483b..dde9857921cd127 100644 --- a/clang/test/CodeGen/matrix-type-operators.c +++ b/clang/test/CodeGen/matrix-type-operators.c @@ -790,7 +790,7 @@ void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], +// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // @@ -804,7 +804,7 @@ void multiply_float_matrix_constant(fx2x3_t a) { // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], +// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void void multiply_compound_float_matrix_constant(fx2x3_t a) { @@ -817,7 +817,7 @@ void multiply_compound_float_matrix_constant(fx2x3_t a) { // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> , [[MAT]] +// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // @@ -831,7 +831,7 @@ void multiply_int_matrix_constant(ix9x3_t a) { // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], +// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5) // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // @@ -939,7 +939,7 @@ void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) { // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], +// CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/CodeGen/neon-immediate-ubsan.c b/clang/test/CodeGen/neon-immediate-ubsan.c index ffe94d4f346fd9e..9e914995373ffe1 100644 --- a/clang/test/CodeGen/neon-immediate-ubsan.c +++ b/clang/test/CodeGen/neon-immediate-ubsan.c @@ -21,6 +21,6 @@ int32x2_t test_vqrshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vqrshrn_n_s64 // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> {{.*}}, i32 1) - // CHECK-ARMV7: call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> {{.*}}, <2 x i64> ) + // CHECK-ARMV7: call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> {{.*}}, <2 x i64> splat (i64 -1)) return vqrshrn_n_s64(a, 0 + 1); } diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c index da7cdff0b1a46b7..d9b34c8e383f99c 100644 --- a/clang/test/CodeGen/nofpclass.c +++ b/clang/test/CodeGen/nofpclass.c @@ -103,7 +103,7 @@ float defined_func_f32(float a, float b, float c) { // CFINITEONLY-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16 // CFINITEONLY-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16 // CFINITEONLY-NEXT: [[TMP3:%.*]] = call nnan ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -// CFINITEONLY-NEXT: [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP3]], +// CFINITEONLY-NEXT: [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP3]], splat (double 4.000000e+00) // CFINITEONLY-NEXT: ret <2 x double> [[ADD]] // // CLFINITEONLY: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) @@ -111,7 +111,7 @@ float defined_func_f32(float a, float b, float c) { // CLFINITEONLY-SAME: (<2 x double> noundef nofpclass(nan inf) [[A:%.*]], <2 x double> noundef nofpclass(nan inf) [[B:%.*]], <2 x double> noundef nofpclass(nan inf) [[C:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CLFINITEONLY-NEXT: entry: // CLFINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[A]], <2 x double> [[B]], <2 x double> [[C]]) -// CLFINITEONLY-NEXT: [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP0]], +// CLFINITEONLY-NEXT: [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP0]], splat (double 4.000000e+00) // CLFINITEONLY-NEXT: ret <2 x double> [[ADD]] // // NONANS: Function Attrs: noinline nounwind optnone @@ -128,7 +128,7 @@ float defined_func_f32(float a, float b, float c) { // NONANS-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16 // NONANS-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16 // NONANS-NEXT: [[TMP3:%.*]] = call nnan <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -// NONANS-NEXT: [[ADD:%.*]] = fadd nnan <2 x double> [[TMP3]], +// NONANS-NEXT: [[ADD:%.*]] = fadd nnan <2 x double> [[TMP3]], splat (double 4.000000e+00) // NONANS-NEXT: ret <2 x double> [[ADD]] // // NOINFS: Function Attrs: noinline nounwind optnone @@ -145,7 +145,7 @@ float defined_func_f32(float a, float b, float c) { // NOINFS-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16 // NOINFS-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16 // NOINFS-NEXT: [[TMP3:%.*]] = call ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -// NOINFS-NEXT: [[ADD:%.*]] = fadd ninf <2 x double> [[TMP3]], +// NOINFS-NEXT: [[ADD:%.*]] = fadd ninf <2 x double> [[TMP3]], splat (double 4.000000e+00) // NOINFS-NEXT: ret <2 x double> [[ADD]] // double2 defined_func_v2f64(double2 a, double2 b, double2 c) { diff --git a/clang/test/CodeGen/ppc-vec_ct-truncate.c b/clang/test/CodeGen/ppc-vec_ct-truncate.c index 42542edfad71c9b..e3a0f11618759c7 100644 --- a/clang/test/CodeGen/ppc-vec_ct-truncate.c +++ b/clang/test/CodeGen/ppc-vec_ct-truncate.c @@ -30,11 +30,11 @@ void test(void) { res_vsi = vec_cts(a1, 31); // CHECK: [[TMP0:%.*]] = load <2 x double>, ptr @a1, align 16 - // CHECK-NEXT: fmul <2 x double> [[TMP0]], + // CHECK-NEXT: fmul <2 x double> [[TMP0]], splat (double 0x41E0000000000000) res_vsi = vec_cts(a1, 500); // CHECK: [[TMP4:%.*]] = load <2 x double>, ptr @a1, align 16 - // CHECK-NEXT: fmul <2 x double> [[TMP4]], + // CHECK-NEXT: fmul <2 x double> [[TMP4]], splat (double 0x4130000000000000) res_vsi = vec_ctu(vf1, 31); // CHECK: [[TMP8:%.*]] = load <4 x float>, ptr @vf1, align 16 @@ -46,19 +46,19 @@ void test(void) { res_vull = vec_ctul(vf1, 31); // CHECK: [[TMP12:%.*]] = load <4 x float>, ptr @vf1, align 16 - // CHECK-NEXT: fmul <4 x float> [[TMP12]], + // CHECK-NEXT: fmul <4 x float> [[TMP12]], splat (float 0x41E0000000000000) res_vull = vec_ctul(vf1, 500); // CHECK: [[TMP21:%.*]] = load <4 x float>, ptr @vf1, align 16 - // CHECK-NEXT: fmul <4 x float> [[TMP21]], + // CHECK-NEXT: fmul <4 x float> [[TMP21]], splat (float 0x4130000000000000) res_vsll = vec_ctsl(vf1, 31); // CHECK: [[TMP30:%.*]] = load <4 x float>, ptr @vf1, align 16 - // CHECK-NEXT: fmul <4 x float> [[TMP30]], + // CHECK-NEXT: fmul <4 x float> [[TMP30]], splat (float 0x41E0000000000000) res_vsll = vec_ctsl(vf1, 500); // CHECK: [[TMP39:%.*]] = load <4 x float>, ptr @vf1, align 16 - // CHECK-NEXT: fmul <4 x float> [[TMP39]], + // CHECK-NEXT: fmul <4 x float> [[TMP39]], splat (float 0x4130000000000000) res_vf = vec_ctf(vsi1, 31); // CHECK: [[TMP48:%.*]] = load <4 x i32>, ptr @vsi1, align 16 @@ -71,10 +71,10 @@ void test(void) { res_vd = vec_ctd(vsi1, 31); // CHECK: [[TMP53:%.*]] = load <4 x i32>, ptr @vsi1, align 16 // CHECK: [[TMP83:%.*]] = call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> [[TMP82:%.*]]) - // CHECK-NEXT: fmul <2 x double> [[TMP83]], + // CHECK-NEXT: fmul <2 x double> [[TMP83]], splat (double 0x3E00000000000000) res_vd = vec_ctd(vsi1, 500); // CHECK: [[TMP84:%.*]] = load <4 x i32>, ptr @vsi1, align 16 // CHECK: [[TMP115:%.*]] = call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> [[TMP114:%.*]]) - // CHECK-NEXT: fmul <2 x double> [[TMP115]], + // CHECK-NEXT: fmul <2 x double> [[TMP115]], splat (double 0x3EB0000000000000) } diff --git a/clang/test/CodeGen/variadic-nvptx.c b/clang/test/CodeGen/variadic-nvptx.c index 4e4fc5ecdef65ed..4c9776342c03e36 100644 --- a/clang/test/CodeGen/variadic-nvptx.c +++ b/clang/test/CodeGen/variadic-nvptx.c @@ -39,7 +39,7 @@ extern void varargs_simple(int, ...); // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[A]], i32 0, i32 2 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK-NEXT: call void (i32, ...) @varargs_simple(i32 noundef 0, i32 [[TMP7]], i8 [[TMP9]], i32 [[TMP11]]) #[[ATTR3]] -// CHECK-NEXT: store <4 x i32> , ptr [[V]], align 16 +// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[V]], align 16 // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, ptr [[V]], align 16 // CHECK-NEXT: call void (i32, ...) @varargs_simple(i32 noundef 0, <4 x i32> noundef [[TMP12]]) #[[ATTR3]] // CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[T]], i32 0, i32 0 diff --git a/clang/test/CodeGen/vecshift.c b/clang/test/CodeGen/vecshift.c index 3cb772a6feb404b..23cb0225f11e22c 100644 --- a/clang/test/CodeGen/vecshift.c +++ b/clang/test/CodeGen/vecshift.c @@ -51,22 +51,22 @@ vector_uint4 vui4; void foo(void) { vc8 = 1 << vc8; // CHECK: [[t0:%.+]] = load <8 x i8>, ptr {{@.+}}, -// CHECK: shl <8 x i8> , [[t0]] +// CHECK: shl <8 x i8> splat (i8 1), [[t0]] vuc8 = 1 << vuc8; // CHECK: [[t1:%.+]] = load <8 x i8>, ptr {{@.+}}, -// CHECK: shl <8 x i8> , [[t1]] +// CHECK: shl <8 x i8> splat (i8 1), [[t1]] vi8 = 1 << vi8; // CHECK: [[t2:%.+]] = load <8 x i32>, ptr {{@.+}}, -// CHECK: shl <8 x i32> , [[t2]] +// CHECK: shl <8 x i32> splat (i32 1), [[t2]] vui8 = 1 << vui8; // CHECK: [[t3:%.+]] = load <8 x i32>, ptr {{@.+}}, -// CHECK: shl <8 x i32> , [[t3]] +// CHECK: shl <8 x i32> splat (i32 1), [[t3]] vs8 = 1 << vs8; // CHECK: [[t4:%.+]] = load <8 x i16>, ptr {{@.+}}, -// CHECK: shl <8 x i16> , [[t4]] +// CHECK: shl <8 x i16> splat (i16 1), [[t4]] vus8 = 1 << vus8; // CHECK: [[t5:%.+]] = load <8 x i16>, ptr {{@.+}}, -// CHECK: shl <8 x i16> , [[t5]] +// CHECK: shl <8 x i16> splat (i16 1), [[t5]] vc8 = c << vc8; // CHECK: [[t6:%.+]] = load i8, ptr @c, @@ -104,7 +104,7 @@ void foo(void) { // CHECK: shl <8 x i16> [[splat_splat20]], [[t15]] vus8 = 1 << vus8; // CHECK: [[t16:%.+]] = load <8 x i16>, ptr {{@.+}}, -// CHECK: [[shl22:%.+]] = shl <8 x i16> , [[t16]] +// CHECK: [[shl22:%.+]] = shl <8 x i16> splat (i16 1), [[t16]] vc8 = vc8 << vc8; // CHECK: [[t17:%.+]] = load <8 x i8>, ptr {{@.+}}, diff --git a/clang/test/CodeGen/vector-scalar.c b/clang/test/CodeGen/vector-scalar.c index 0c973cd41ee44bf..f115bae0f75830c 100644 --- a/clang/test/CodeGen/vector-scalar.c +++ b/clang/test/CodeGen/vector-scalar.c @@ -5,28 +5,28 @@ typedef unsigned char uchar4 __attribute__ ((vector_size (4))); // CHECK: @add2 -// CHECK: add <4 x i8> {{.*}}, +// CHECK: add <4 x i8> {{.*}}, splat (i8 2) uchar4 add2(uchar4 v) { return v + 2; } // CHECK: @sub2 -// CHECK: sub <4 x i8> {{.*}}, +// CHECK: sub <4 x i8> {{.*}}, splat (i8 2) uchar4 sub2(uchar4 v) { return v - 2; } // CHECK: @mul2 -// CHECK: mul <4 x i8> {{.*}}, +// CHECK: mul <4 x i8> {{.*}}, splat (i8 2) uchar4 mul2(uchar4 v) { return v * 2; } // CHECK: @div2 -// CHECK: udiv <4 x i8> {{.*}}, +// CHECK: udiv <4 x i8> {{.*}}, splat (i8 2) uchar4 div2(uchar4 v) { return v / 2; @@ -35,7 +35,7 @@ uchar4 div2(uchar4 v) typedef __attribute__(( ext_vector_type(4) )) unsigned char uchar4_ext; // CHECK: @div3_ext -// CHECK: udiv <4 x i8> %{{.*}}, +// CHECK: udiv <4 x i8> %{{.*}}, splat (i8 3) uchar4_ext div3_ext(uchar4_ext v) { return v / 3; diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index c92a970b42f60fe..7c323c2d368c0ec 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -1581,7 +1581,7 @@ TEST_UNINIT(intvec16, int __attribute__((vector_size(16)))); // CHECK: %uninit = alloca <4 x i32>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_intvec16_uninit() -// PATTERN: store <4 x i32> , ptr %uninit, align 16, !annotation [[AUTO_INIT]] +// PATTERN: store <4 x i32> splat (i32 [[I32]]), ptr %uninit, align 16, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_intvec16_uninit() // ZERO: store <4 x i32> zeroinitializer, ptr %uninit, align 16, !annotation [[AUTO_INIT]] @@ -1595,7 +1595,7 @@ TEST_BRACES(intvec16, int __attribute__((vector_size(16)))); TEST_CUSTOM(intvec16, int __attribute__((vector_size(16))), { 0x44444444, 0x44444444, 0x44444444, 0x44444444 }); // CHECK-LABEL: @test_intvec16_custom() // CHECK: %custom = alloca <4 x i32>, align [[ALIGN:[0-9]*]] -// CHECK-NEXT: store <4 x i32> , ptr %custom, align [[ALIGN]] +// CHECK-NEXT: store <4 x i32> splat (i32 1145324612), ptr %custom, align [[ALIGN]] // CHECK-NOT: !annotation // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) @@ -1604,7 +1604,7 @@ TEST_UNINIT(longlongvec32, long long __attribute__((vector_size(32)))); // CHECK: %uninit = alloca <4 x i64>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_longlongvec32_uninit() -// PATTERN: store <4 x i64> , ptr %uninit, align 32, !annotation [[AUTO_INIT]] +// PATTERN: store <4 x i64> splat (i64 [[I64]]), ptr %uninit, align 32, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_longlongvec32_uninit() // ZERO: store <4 x i64> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]] @@ -1618,7 +1618,7 @@ TEST_BRACES(longlongvec32, long long __attribute__((vector_size(32)))); TEST_CUSTOM(longlongvec32, long long __attribute__((vector_size(32))), { 0x3333333333333333, 0x3333333333333333, 0x3333333333333333, 0x3333333333333333 }); // CHECK-LABEL: @test_longlongvec32_custom() // CHECK: %custom = alloca <4 x i64>, align [[ALIGN:[0-9]*]] -// CHECK-NEXT: store <4 x i64> , ptr %custom, align [[ALIGN]] +// CHECK-NEXT: store <4 x i64> splat (i64 3689348814741910323), ptr %custom, align [[ALIGN]] // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) TEST_UNINIT(floatvec16, float __attribute__((vector_size(16)))); @@ -1626,7 +1626,7 @@ TEST_UNINIT(floatvec16, float __attribute__((vector_size(16)))); // CHECK: %uninit = alloca <4 x float>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_floatvec16_uninit() -// PATTERN: store <4 x float> , ptr %uninit, align 16, !annotation [[AUTO_INIT]] +// PATTERN: store <4 x float> splat (float 0xFFFFFFFFE0000000), ptr %uninit, align 16, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_floatvec16_uninit() // ZERO: store <4 x float> zeroinitializer, ptr %uninit, align 16, !annotation [[AUTO_INIT]] @@ -1640,7 +1640,7 @@ TEST_BRACES(floatvec16, float __attribute__((vector_size(16)))); TEST_CUSTOM(floatvec16, float __attribute__((vector_size(16))), { 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433 }); // CHECK-LABEL: @test_floatvec16_custom() // CHECK: %custom = alloca <4 x float>, align [[ALIGN:[0-9]*]] -// CHECK-NEXT: store <4 x float> , ptr %custom, align [[ALIGN]] +// CHECK-NEXT: store <4 x float> splat (float 0x400921FB60000000), ptr %custom, align [[ALIGN]] // CHECK-NOT: !annotation // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) @@ -1649,7 +1649,7 @@ TEST_UNINIT(doublevec32, double __attribute__((vector_size(32)))); // CHECK: %uninit = alloca <4 x double>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_doublevec32_uninit() -// PATTERN: store <4 x double> , ptr %uninit, align 32, !annotation [[AUTO_INIT]] +// PATTERN: store <4 x double> splat (double 0xFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_doublevec32_uninit() // ZERO: store <4 x double> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]] @@ -1663,7 +1663,7 @@ TEST_BRACES(doublevec32, double __attribute__((vector_size(32)))); TEST_CUSTOM(doublevec32, double __attribute__((vector_size(32))), { 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433 }); // CHECK-LABEL: @test_doublevec32_custom() // CHECK: %custom = alloca <4 x double>, align [[ALIGN:[0-9]*]] -// CHECK-NEXT: store <4 x double> , ptr %custom, align [[ALIGN]] +// CHECK-NEXT: store <4 x double> splat (double 0x400921FB54442D18), ptr %custom, align [[ALIGN]] // CHECK-NOT: !annotation // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) @@ -1673,7 +1673,7 @@ TEST_UNINIT(doublevec24, double __attribute__((vector_size(24)))); // CHECK: %uninit = alloca <3 x double>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_doublevec24_uninit() -// PATTERN: store <3 x double> , ptr %uninit, align 32, !annotation [[AUTO_INIT]] +// PATTERN: store <3 x double> splat (double 0xFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_doublevec24_uninit() // ZERO: store <3 x double> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]] @@ -1683,7 +1683,7 @@ TEST_UNINIT(longdoublevec32, long double __attribute__((vector_size(sizeof(long // CHECK: %uninit = alloca <2 x x86_fp80>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_longdoublevec32_uninit() -// PATTERN: store <2 x x86_fp80> , ptr %uninit, align 32, !annotation [[AUTO_INIT]] +// PATTERN: store <2 x x86_fp80> splat (x86_fp80 0xKFFFFFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_longdoublevec32_uninit() // ZERO: store <2 x x86_fp80> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]] diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp index a098f5a9057c0e0..97b5d6ce16b8806 100644 --- a/clang/test/CodeGenCXX/ext-int.cpp +++ b/clang/test/CodeGenCXX/ext-int.cpp @@ -450,38 +450,38 @@ void ShiftBitIntByConstant(uint16_t4 Ext) { // LIN32: define dso_local void @_Z21ShiftBitIntByConstantDv4_DU16_(i64 % // WIN: define dso_local void @"?ShiftBitIntByConstant@@YAXT?$__vector@U?$_UBitInt@$0BA@@__clang@@$03@__clang@@@Z"(<4 x i16> Ext << 7; - // CHECK: shl <4 x i16> %{{.+}}, + // CHECK: shl <4 x i16> %{{.+}}, splat (i16 7) Ext >> 7; - // CHECK: lshr <4 x i16> %{{.+}}, + // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 7) Ext << -7; - // CHECK: shl <4 x i16> %{{.+}}, + // CHECK: shl <4 x i16> %{{.+}}, splat (i16 -7) Ext >> -7; - // CHECK: lshr <4 x i16> %{{.+}}, + // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 -7) // UB in C/C++, Defined in OpenCL. Ext << 29; - // CHECK: shl <4 x i16> %{{.+}}, + // CHECK: shl <4 x i16> %{{.+}}, splat (i16 29) Ext >> 29; - // CHECK: lshr <4 x i16> %{{.+}}, + // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 29) } void ShiftBitIntByConstant(vint32_t8 Ext) { // LIN64: define{{.*}} void @_Z21ShiftBitIntByConstantDv8_DB32_(ptr byval(<8 x i32>) align 32 % // LIN32: define dso_local void @_Z21ShiftBitIntByConstantDv8_DB32_(<8 x i32> % // WIN: define dso_local void @"?ShiftBitIntByConstant@@YAXT?$__vector@U?$_BitInt@$0CA@@__clang@@$07@__clang@@@Z"(<8 x i32> Ext << 7; - // CHECK: shl <8 x i32> %{{.+}}, + // CHECK: shl <8 x i32> %{{.+}}, splat (i32 7) Ext >> 7; - // CHECK: ashr <8 x i32> %{{.+}}, + // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 7) Ext << -7; - // CHECK: shl <8 x i32> %{{.+}}, + // CHECK: shl <8 x i32> %{{.+}}, splat (i32 -7) Ext >> -7; - // CHECK: ashr <8 x i32> %{{.+}}, + // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 -7) // UB in C/C++, Defined in OpenCL. Ext << 29; - // CHECK: shl <8 x i32> %{{.+}}, + // CHECK: shl <8 x i32> %{{.+}}, splat (i32 29) Ext >> 29; - // CHECK: ashr <8 x i32> %{{.+}}, + // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 29) } void ConstantShiftByBitInt(_BitInt(28) Ext, _BitInt(65) LargeExt) { diff --git a/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp b/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp index 139feab30cb2ff4..4504000856f23aa 100644 --- a/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp +++ b/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp @@ -41,7 +41,7 @@ void TwoVectorOps() { // CHECK: [[RHS:%.+]] = load <2 x i32> // CHECK: [[NEG:%.+]] = icmp slt <2 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i32> - // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_AND:%.+]] = and <2 x i32> [[RHS]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <2 x i32> [[LHS]], [[SEXT]] // CHECK: = or <2 x i32> [[RHS_AND]], [[LHS_AND]] @@ -52,7 +52,7 @@ void TwoVectorOps() { // CHECK: [[RHS:%.+]] = load <2 x float> // CHECK: [[NEG:%.+]] = icmp slt <2 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i32> - // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_EXT:%.+]] = bitcast <2 x float> [[RHS]] to <2 x i32> // CHECK: [[LHS_EXT:%.+]] = bitcast <2 x float> [[LHS]] to <2 x i32> // CHECK: [[RHS_AND:%.+]] = and <2 x i32> [[RHS_EXT]], [[XOR]] @@ -66,7 +66,7 @@ void TwoVectorOps() { // CHECK: [[RHS:%.+]] = load <2 x double> // CHECK: [[NEG:%.+]] = icmp slt <2 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i64> - // CHECK: [[XOR:%.+]] = xor <2 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <2 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[RHS_EXT:%.+]] = bitcast <2 x double> [[RHS]] to <2 x i64> // CHECK: [[LHS_EXT:%.+]] = bitcast <2 x double> [[LHS]] to <2 x i64> // CHECK: [[RHS_AND:%.+]] = and <2 x i64> [[RHS_EXT]], [[XOR]] @@ -87,7 +87,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i16> [[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i16> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i16> - // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], splat (i16 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i16> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i16> [[LHS_SPLAT]], [[SEXT]] // CHECK: = or <4 x i16> [[RHS_AND]], [[LHS_AND]] @@ -102,7 +102,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i16> [[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i16> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i16> - // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], splat (i16 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i16> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i16> [[LHS_SPLAT]], [[SEXT]] // CHECK: = or <4 x i16> [[RHS_AND]], [[LHS_AND]] @@ -119,7 +119,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS_SPLAT]], [[SEXT]] // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]] @@ -135,7 +135,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x float> [[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x float> [[RHS_SPLAT]] to <4 x i32> // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x float> [[LHS_SPLAT]] to <4 x i32> // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_CAST]], [[XOR]] @@ -153,7 +153,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x double> [[RHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x double> [[RHS_SPLAT]] to <4 x i64> // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x double> [[LHS_SPLAT]] to <4 x i64> // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_CAST]], [[XOR]] @@ -171,7 +171,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS_SPLAT]], [[SEXT]] // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]] @@ -187,7 +187,7 @@ void OneScalarOp() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS]], [[SEXT]] // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]] @@ -197,8 +197,8 @@ void OneScalarOp() { // CHECK: [[LHS:%.+]] = load <4 x i32> // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], - // CHECK: [[RHS_AND:%.+]] = and <4 x i32> , [[XOR]] + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) + // CHECK: [[RHS_AND:%.+]] = and <4 x i32> splat (i32 5), [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS]], [[SEXT]] // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]] @@ -210,7 +210,7 @@ void OneScalarOp() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x float> [[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x float> [[RHS_SPLAT]] to <4 x i32> // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x float> [[LHS]] to <4 x i32> // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_CAST]], [[XOR]] @@ -222,9 +222,9 @@ void OneScalarOp() { // CHECK: [[LHS:%.+]] = load <4 x double> // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x double> [[LHS]] to <4 x i64> - // CHECK: [[RHS_AND:%.+]] = and <4 x i64> , [[XOR]] + // CHECK: [[RHS_AND:%.+]] = and <4 x i64> splat (i64 4618441417868443648), [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS_CAST]], [[SEXT]] // CHECK: = or <4 x i64> [[RHS_AND]], [[LHS_AND]] @@ -233,8 +233,8 @@ void OneScalarOp() { // CHECK: [[LHS:%.+]] = load <4 x i64> // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], - // CHECK: [[RHS_AND:%.+]] = and <4 x i64> , [[XOR]] + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) + // CHECK: [[RHS_AND:%.+]] = and <4 x i64> splat (i64 6), [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]] // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]] @@ -247,7 +247,7 @@ void OneScalarOp() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i64> [[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]] // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]] @@ -260,7 +260,7 @@ void OneScalarOp() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i64> [[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]] // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]] diff --git a/clang/test/CodeGenCXX/matrix-type-builtins.cpp b/clang/test/CodeGenCXX/matrix-type-builtins.cpp index 164a220de26d4dd..a7fde8719c128a4 100644 --- a/clang/test/CodeGenCXX/matrix-type-builtins.cpp +++ b/clang/test/CodeGenCXX/matrix-type-builtins.cpp @@ -57,7 +57,7 @@ void test_transpose_rvalue() { // CHECK-NEXT: entry: // CHECK-NEXT: [[M_T_ADDR:%.*]] = alloca [9 x float], align 4 // CHECK-NEXT: [[CALL_RES:%.*]] = call noundef <9 x float> @_Z10get_matrixv() - // CHECK-NEXT: [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], + // CHECK-NEXT: [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], splat (float 2.000000e+00) // CHECK-NEXT: [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[ADD]], i32 3, i32 3) // CHECK-NEXT: store <9 x float> [[M_T]], ptr [[M_T_ADDR]], align 4 matrix_t m_t = __builtin_matrix_transpose(get_matrix() + 2.0); diff --git a/clang/test/CodeGenCXX/matrix-type-operators.cpp b/clang/test/CodeGenCXX/matrix-type-operators.cpp index 8974d2b2600292c..8854a718fb13ecb 100644 --- a/clang/test/CodeGenCXX/matrix-type-operators.cpp +++ b/clang/test/CodeGenCXX/matrix-type-operators.cpp @@ -407,7 +407,7 @@ void test_constexpr2(matrix_type &m) { // NOOPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}} // OPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]] - // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], + // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], splat (i32 1) // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}} // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4 diff --git a/clang/test/CodeGenCXX/vector-size-conditional.cpp b/clang/test/CodeGenCXX/vector-size-conditional.cpp index fcfaa76d8b8c457..033847cbb083adc 100644 --- a/clang/test/CodeGenCXX/vector-size-conditional.cpp +++ b/clang/test/CodeGenCXX/vector-size-conditional.cpp @@ -146,7 +146,7 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load <4 x i32> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> splat (i32 5) four_ints ?: some_float; // CHECK: %[[COND:.+]] = load <4 x i32> @@ -161,7 +161,7 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load <4 x i32> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> splat (i32 5) four_ints ? some_float : four_ints; // CHECK: %[[COND:.+]] = load <4 x i32> @@ -186,19 +186,19 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i64> // CHECK: %[[LHS:.+]] = load <4 x double> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS]], <4 x double> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS]], <4 x double> splat (double 6.{{.+}}) four_ll ? four_ll : 6.0; // CHECK: %[[COND:.+]] = load <4 x i64> // CHECK: %[[LHS:.+]] = load <4 x i64> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> splat (i64 6) four_ll ? four_ll : 6; // CHECK: %[[COND:.+]] = load <4 x i64> // CHECK: %[[LHS:.+]] = load <4 x i64> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> splat (i64 6) four_ll ? four_ll : some_int; // CHECK: %[[COND:.+]] = load <4 x i64> diff --git a/clang/test/CodeGenCXX/vector-splat-conversion.cpp b/clang/test/CodeGenCXX/vector-splat-conversion.cpp index 08b2fa8e96cb442..95c744043c0d8e3 100644 --- a/clang/test/CodeGenCXX/vector-splat-conversion.cpp +++ b/clang/test/CodeGenCXX/vector-splat-conversion.cpp @@ -22,28 +22,28 @@ typedef __attribute__((__ext_vector_type__(4))) __int128 bigint4; // CHECK-LABEL: define{{.*}} void @_Z14BoolConversionv void BoolConversion() { - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) int4 intsT = (int4)true; // CHECK: store <4 x i32> zeroinitializer int4 intsF = (int4)false; - // CHECK: store <4 x float> + // CHECK: store <4 x float> splat (float -1.000000e+00) float4 floatsT = (float4)true; // CHECK: store <4 x float> zeroinitializer float4 floatsF = (float4)false; - // CHECK: store <4 x i128> + // CHECK: store <4 x i128> splat (i128 -1) bigint4 bigintsT = (bigint4)true; // CHECK: store <4 x i128> zeroinitializer bigint4 bigintsF = (bigint4)false; - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) constexpr int4 cIntsT = (int4)true; // CHECK: store <4 x i32> zeroinitializer constexpr int4 cIntsF = (int4)false; - // CHECK: store <4 x float> + // CHECK: store <4 x float> splat (float -1.000000e+00) constexpr float4 cFloatsT = (float4)true; // CHECK: store <4 x float> zeroinitializer constexpr float4 cFloatsF = (float4)false; - // CHECK: store <4 x i128> + // CHECK: store <4 x i128> splat (i128 -1) constexpr bigint4 cBigintsT = (bigint4)true; // CHECK: store <4 x i128> zeroinitializer constexpr bigint4 cBigintsF = (bigint4)false; diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl index 6478ea67e32a0df..dd7dfd176970370 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl @@ -3,7 +3,7 @@ // CHECK-LABEL: f3_to_d4 // CHECK: [[f3:%.*]] = alloca <3 x float> // CHECK: [[d4:%.*]] = alloca <4 x double> -// CHECK: store <3 x float> , ptr [[f3]] +// CHECK: store <3 x float> splat (float 1.000000e+00), ptr [[f3]] // CHECK: [[vecf3:%.*]] = load <3 x float>, ptr [[f3]] // CHECK: [[vecf4:%.*]] = shufflevector <3 x float> [[vecf3]], <3 x float> poison, <4 x i32> // CHECK: [[vecd4:%.*]] = fpext <4 x float> [[vecf4]] to <4 x double> @@ -16,7 +16,7 @@ void f3_to_d4() { // CHECK-LABEL: f3_to_f2 // CHECK: [[f3:%.*]] = alloca <3 x float> // CHECK: [[f2:%.*]] = alloca <2 x float> -// CHECK: store <3 x float> , ptr [[f3]] +// CHECK: store <3 x float> splat (float 2.000000e+00), ptr [[f3]] // CHECK: [[vecf3:%.*]] = load <3 x float>, ptr [[f3]] // CHECK: [[vecf2:%.*]] = shufflevector <3 x float> [[vecf3]], <3 x float> poison, <2 x i32> // CHECK: store <2 x float> [[vecf2]], ptr [[f2]] @@ -28,7 +28,7 @@ void f3_to_f2() { // CHECK-LABEL: d4_to_f2 // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[f2:%.*]] = alloca <2 x float> -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 3.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecf4:%.*]] = fptrunc <4 x double> [[vecd4]] to <4 x float> // CHECK: [[vecf2:%.*]] = shufflevector <4 x float> [[vecf4]], <4 x float> poison, <2 x i32> @@ -41,7 +41,7 @@ void d4_to_f2() { // CHECK-LABEL: f2_to_i2 // CHECK: [[f2:%.*]] = alloca <2 x float> // CHECK: [[i2:%.*]] = alloca <2 x i32> -// CHECK: store <2 x float> , ptr [[f2]] +// CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]] // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]] // CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32> // CHECK: store <2 x i32> [[veci2]], ptr [[i2]] @@ -53,7 +53,7 @@ void f2_to_i2() { // CHECK-LABEL: d4_to_i2 // CHECK: [[f4:%.*]] = alloca <4 x double> // CHECK: [[i2:%.*]] = alloca <2 x i32> -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32> // CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> @@ -66,7 +66,7 @@ void d4_to_i2() { // CHECK-LABEL: d4_to_l4 // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[l4:%.*]] = alloca <4 x i64> -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 6.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecl4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i64> // CHECK: store <4 x i64> [[vecl4]], ptr [[l4]] @@ -79,7 +79,7 @@ void d4_to_l4() { // CHECK-LABEL: l4_to_i2 // CHECK: [[l4:%.*]] = alloca <4 x i64> // CHECK: [[i2:%.*]] = alloca <2 x i32> -// CHECK: store <4 x i64> , ptr [[l4]] +// CHECK: store <4 x i64> splat (i64 7), ptr [[l4]] // CHECK: [[vecl4:%.*]] = load <4 x i64>, ptr [[l4]] // CHECK: [[veci4:%.*]] = trunc <4 x i64> [[vecl4]] to <4 x i32> // CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> @@ -92,7 +92,7 @@ void l4_to_i2() { // CHECK-LABEL: i2_to_b2 // CHECK: [[l2:%.*]] = alloca <2 x i32> // CHECK: [[b2:%.*]] = alloca i8 -// CHECK: store <2 x i32> , ptr [[i2]] +// CHECK: store <2 x i32> splat (i32 8), ptr [[i2]] // CHECK: [[veci2:%.*]] = load <2 x i32>, ptr [[i2]] // CHECK: [[vecb2:%.*]] = icmp ne <2 x i32> [[veci2]], zeroinitializer // CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecb2]], <2 x i1> poison, <8 x i32> @@ -106,7 +106,7 @@ void i2_to_b2() { // CHECK-LABEL: d4_to_b2 // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[b2:%.*]] = alloca i8 -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecb4:%.*]] = fcmp une <4 x double> [[vecd4]], zeroinitializer // CHECK: [[vecd2:%.*]] = shufflevector <4 x i1> [[vecb4]], <4 x i1> poison, <2 x i32> @@ -121,7 +121,7 @@ void d4_to_b2() { // CHECK-LABEL: d4_to_d1 // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[d1:%.*]] = alloca <1 x double> -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecd1:%.*]] = shufflevector <4 x double> [[vecd4]], <4 x double> poison, <1 x i32> zeroinitializer // CHECK: store <1 x double> [[vecd1]], ptr [[d1:%.*]], align 8 @@ -133,7 +133,7 @@ void d4_to_d1() { // CHECK-LABEL: d4_to_dScalar // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[d:%.*]] = alloca double -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[d4x:%.*]] = extractelement <4 x double> [[vecd4]], i32 0 // CHECK: store double [[d4x]], ptr [[d]] diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl index 6395ddc2fee2a25..94a95107eea69ce 100644 --- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl @@ -20,7 +20,7 @@ float4 ToFourFloats(float V){ // CHECK-LABEL: FillOne // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4 -// CHECK: store <1 x i32> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x i32>, ptr [[vec1Ptr]], align 4 // CHECK: [[vec2:%.*]] = shufflevector <1 x i32> [[vec1]], <1 x i32> poison, <2 x i32> zeroinitializer // CHECK: ret <2 x i32> [[vec2]] @@ -30,7 +30,7 @@ int2 FillOne(){ // CHECK-LABEL: FillOneUnsigned // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4 -// CHECK: store <1 x i32> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x i32>, ptr [[vec1Ptr]], align 4 // CHECK: [[vec3:%.*]] = shufflevector <1 x i32> [[vec1]], <1 x i32> poison, <3 x i32> zeroinitializer // CHECK: ret <3 x i32> [[vec3]] @@ -40,7 +40,7 @@ uint3 FillOneUnsigned(){ // CHECK-LABEL: FillOneUnsignedLong // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i64>, align 8 -// CHECK: store <1 x i64> , ptr [[vec1Ptr]], align 8 +// CHECK: store <1 x i64> splat (i64 1), ptr [[vec1Ptr]], align 8 // CHECK: [[vec1:%.*]] = load <1 x i64>, ptr [[vec1Ptr]], align 8 // CHECK: [[vec4:%.*]] = shufflevector <1 x i64> [[vec1]], <1 x i64> poison, <4 x i32> zeroinitializer // CHECK: ret <4 x i64> [[vec4]] @@ -50,7 +50,7 @@ vector FillOneUnsignedLong(){ // CHECK-LABEL: FillTwoPointFive // CHECK: [[vec1Ptr:%.*]] = alloca <1 x double>, align 8 -// CHECK: store <1 x double> , ptr [[vec1Ptr]], align 8 +// CHECK: store <1 x double> splat (double 2.500000e+00), ptr [[vec1Ptr]], align 8 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[vec1Ptr]], align 8 // CHECK: [[vec2:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <2 x i32> zeroinitializer // CHECK: ret <2 x double> [[vec2]] @@ -60,7 +60,7 @@ double2 FillTwoPointFive(){ // CHECK-LABEL: FillOneHalf // CHECK: [[vec1Ptr:%.*]] = alloca <1 x double>, align 8 -// CHECK: store <1 x double> , ptr [[vec1Ptr]], align 8 +// CHECK: store <1 x double> splat (double 5.000000e-01), ptr [[vec1Ptr]], align 8 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[vec1Ptr]], align 8 // CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer // CHECK: ret <3 x double> [[vec3]] @@ -70,7 +70,7 @@ double3 FillOneHalf(){ // CHECK-LABEL: FillTwoPointFiveFloat // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4 -// CHECK: store <1 x float> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x float> splat (float 2.500000e+00), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4 // CHECK: [[vec4:%.*]] = shufflevector <1 x float> [[vec1]], <1 x float> poison, <4 x i32> zeroinitializer // CHECK: ret <4 x float> [[vec4]] @@ -83,7 +83,7 @@ float4 FillTwoPointFiveFloat(){ // CHECK-LABEL: FillOneHalfFloat // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4 -// CHECK: store <1 x float> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x float> splat (float 5.000000e-01), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4 // CHECK: [[el0:%.*]] = extractelement <1 x float> [[vec1]], i32 0 // CHECK: [[vec1Splat:%.*]] = insertelement <1 x float> poison, float [[el0]], i64 0 @@ -116,7 +116,7 @@ float2 HowManyFloats(float V) { // CHECK-LABEL: AllRighty // CHECK: [[Tmp:%.*]] = alloca <1 x double>, align 8 -// CHECK: store <1 x double> , ptr [[Tmp]], align 8 +// CHECK: store <1 x double> splat (double 1.000000e+00), ptr [[Tmp]], align 8 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[Tmp]], align 8 // CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer // CHECK: [[vec3f:%.*]] = fptrunc <3 x double> [[vec3]] to <3 x float> @@ -128,7 +128,7 @@ float3 AllRighty() { // CHECK-LABEL: AllRighty2 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4 -// CHECK: store <1 x float> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x float> splat (float 1.000000e+00), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4 // CHECK: [[vec3:%.*]] = shufflevector <1 x float> [[vec1]], <1 x float> poison, <3 x i32> // CHECK: ret <3 x float> [[vec3]] diff --git a/clang/test/CodeGenHLSL/builtins/rcp.hlsl b/clang/test/CodeGenHLSL/builtins/rcp.hlsl index eb89bcc4c7c01e7..83fe33406c7c895 100644 --- a/clang/test/CodeGenHLSL/builtins/rcp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/rcp.hlsl @@ -25,31 +25,31 @@ half test_rcp_half(half p0) { return rcp(p0); } // DXIL_NATIVE_HALF: define noundef <2 x half> @ // SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> , %{{.*}} +// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <2 x half> %hlsl.rcp // DXIL_NO_HALF: define noundef <2 x float> @ // SPIR_NO_HALF: define spir_func noundef <2 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <2 x float> , %{{.*}} +// NO_HALF: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <2 x float> %hlsl.rcp half2 test_rcp_half2(half2 p0) { return rcp(p0); } // DXIL_NATIVE_HALF: define noundef <3 x half> @ // SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> , %{{.*}} +// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <3 x half> %hlsl.rcp // DXIL_NO_HALF: define noundef <3 x float> @ // SPIR_NO_HALF: define spir_func noundef <3 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <3 x float> , %{{.*}} +// NO_HALF: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <3 x float> %hlsl.rcp half3 test_rcp_half3(half3 p0) { return rcp(p0); } // DXIL_NATIVE_HALF: define noundef <4 x half> @ // SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> , %{{.*}} +// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <4 x half> %hlsl.rcp // DXIL_NO_HALF: define noundef <4 x float> @ // SPIR_NO_HALF: define spir_func noundef <4 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <4 x float> , %{{.*}} +// NO_HALF: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <4 x float> %hlsl.rcp half4 test_rcp_half4(half4 p0) { return rcp(p0); } @@ -61,19 +61,19 @@ float test_rcp_float(float p0) { return rcp(p0); } // DXIL_CHECK: define noundef <2 x float> @ // SPIR_CHECK: define spir_func noundef <2 x float> @ -// CHECK: %hlsl.rcp = fdiv <2 x float> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <2 x float> %hlsl.rcp float2 test_rcp_float2(float2 p0) { return rcp(p0); } // DXIL_CHECK: define noundef <3 x float> @ // SPIR_CHECK: define spir_func noundef <3 x float> @ -// CHECK: %hlsl.rcp = fdiv <3 x float> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <3 x float> %hlsl.rcp float3 test_rcp_float3(float3 p0) { return rcp(p0); } // DXIL_CHECK: define noundef <4 x float> @ // SPIR_CHECK: define spir_func noundef <4 x float> @ -// CHECK: %hlsl.rcp = fdiv <4 x float> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <4 x float> %hlsl.rcp float4 test_rcp_float4(float4 p0) { return rcp(p0); } @@ -85,18 +85,18 @@ double test_rcp_double(double p0) { return rcp(p0); } // DXIL_CHECK: define noundef <2 x double> @ // SPIR_CHECK: define spir_func noundef <2 x double> @ -// CHECK: %hlsl.rcp = fdiv <2 x double> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <2 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <2 x double> %hlsl.rcp double2 test_rcp_double2(double2 p0) { return rcp(p0); } // DXIL_CHECK: define noundef <3 x double> @ // SPIR_CHECK: define spir_func noundef <3 x double> @ -// CHECK: %hlsl.rcp = fdiv <3 x double> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <3 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <3 x double> %hlsl.rcp double3 test_rcp_double3(double3 p0) { return rcp(p0); } // DXIL_CHECK: define noundef <4 x double> @ // SPIR_CHECK: define spir_func noundef <4 x double> @ -// CHECK: %hlsl.rcp = fdiv <4 x double> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <4 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <4 x double> %hlsl.rcp double4 test_rcp_double4(double4 p0) { return rcp(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/sign.hlsl b/clang/test/CodeGenHLSL/builtins/sign.hlsl index 1cdefa815b103f7..8cc910933f462b8 100644 --- a/clang/test/CodeGenHLSL/builtins/sign.hlsl +++ b/clang/test/CodeGenHLSL/builtins/sign.hlsl @@ -121,17 +121,17 @@ int test_sign_uint16_t(uint16_t p0) { return sign(p0); } // NATIVE_HALF: define [[FNATTRS]] <2 x i32> @ // NATIVE_HALF: [[CMP:%.*]] = icmp eq <2 x i16> [[ARG:%.*]], zeroinitializer -// NATIVE_HALF: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> +// NATIVE_HALF: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1) int2 test_sign_uint16_t2(uint16_t2 p0) { return sign(p0); } // NATIVE_HALF: define [[FNATTRS]] <3 x i32> @ // NATIVE_HALF: [[CMP:%.*]] = icmp eq <3 x i16> [[ARG:%.*]], zeroinitializer -// NATIVE_HALF: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> +// NATIVE_HALF: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1) int3 test_sign_uint16_t3(uint16_t3 p0) { return sign(p0); } // NATIVE_HALF: define [[FNATTRS]] <4 x i32> @ // NATIVE_HALF: [[CMP:%.*]] = icmp eq <4 x i16> [[ARG:%.*]], zeroinitializer -// NATIVE_HALF: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> +// NATIVE_HALF: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1) int4 test_sign_uint16_t4(uint16_t4 p0) { return sign(p0); } #endif // __HLSL_ENABLE_16_BIT @@ -164,17 +164,17 @@ int test_sign_uint(uint p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <2 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <2 x i32> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> +// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1) int2 test_sign_uint2(uint2 p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <3 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <3 x i32> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> +// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1) int3 test_sign_uint3(uint3 p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <4 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <4 x i32> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> +// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1) int4 test_sign_uint4(uint4 p0) { return sign(p0); } @@ -206,15 +206,15 @@ int test_sign_uint64_t(uint64_t p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <2 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <2 x i64> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> +// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1) int2 test_sign_uint64_t2(uint64_t2 p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <3 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <3 x i64> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> +// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1) int3 test_sign_uint64_t3(uint64_t3 p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <4 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <4 x i64> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> +// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1) int4 test_sign_uint64_t4(uint64_t4 p0) { return sign(p0); } diff --git a/clang/test/CodeGenOpenCL/bool_cast.cl b/clang/test/CodeGenOpenCL/bool_cast.cl index 8ba8c8a0176234f..3cb2c0c09734c4d 100644 --- a/clang/test/CodeGenOpenCL/bool_cast.cl +++ b/clang/test/CodeGenOpenCL/bool_cast.cl @@ -22,12 +22,12 @@ void kernel ker() { uchar4 vc; vc = (uchar4)true; -// CHECK: store <4 x i8> , ptr %vc, align 4 +// CHECK: store <4 x i8> splat (i8 -1), ptr %vc, align 4 unsigned char c; c = (unsigned char)true; // CHECK: store i8 1, ptr %c, align 1 float4 vf; vf = (float4)true; -// CHECK: store <4 x float> +// CHECK: store <4 x float> splat (float -1.000000e+00) } diff --git a/clang/test/CodeGenOpenCL/logical-ops.cl b/clang/test/CodeGenOpenCL/logical-ops.cl index f083a8580ee7806..f3c8bfff6399d07 100644 --- a/clang/test/CodeGenOpenCL/logical-ops.cl +++ b/clang/test/CodeGenOpenCL/logical-ops.cl @@ -10,23 +10,23 @@ typedef double double4 __attribute((ext_vector_type(4))); // CHECK: floatops kernel void floatops(global int4 *out, global float4 *fout) { - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) out[0] = (float4)(1, 1, 1, 1) && 1.0f; // CHECK: store <4 x i32> zeroinitializer out[1] = (float4)(0, 0, 0, 0) && (float4)(0, 0, 0, 0); - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) out[2] = (float4)(0, 0, 0, 0) || (float4)(1, 1, 1, 1); // CHECK: store <4 x i32> zeroinitializer out[3] = (float4)(0, 0, 0, 0) || 0.0f; - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) out[4] = !(float4)(0, 0, 0, 0); // CHECK: store <4 x i32> zeroinitializer out[5] = !(float4)(1, 2, 3, 4); // CHECK: store <4 x i32> out[6] = !(float4)(0, 1, 0, 1); - // CHECK: store <4 x float> + // CHECK: store <4 x float> splat (float 1.000000e+00) fout[0] = (float4)(!0.0f); // CHECK: store <4 x float> zeroinitializer fout[1] = (float4)(!1.0f); @@ -34,23 +34,23 @@ kernel void floatops(global int4 *out, global float4 *fout) { // CHECK: doubleops kernel void doubleops(global long4 *out, global double4 *dout) { - // CHECK: store <4 x i64> + // CHECK: store <4 x i64> splat (i64 -1) out[0] = (double4)(1, 1, 1, 1) && 1.0; // CHECK: store <4 x i64> zeroinitializer out[1] = (double4)(0, 0, 0, 0) && (double4)(0, 0, 0, 0); - // CHECK: store <4 x i64> + // CHECK: store <4 x i64> splat (i64 -1) out[2] = (double4)(0, 0, 0, 0) || (double4)(1, 1, 1, 1); // CHECK: store <4 x i64> zeroinitializer out[3] = (double4)(0, 0, 0, 0) || 0.0f; - // CHECK: store <4 x i64> + // CHECK: store <4 x i64> splat (i64 -1) out[4] = !(double4)(0, 0, 0, 0); // CHECK: store <4 x i64> zeroinitializer out[5] = !(double4)(1, 2, 3, 4); // CHECK: store <4 x i64> out[6] = !(double4)(0, 1, 0, 1); - // CHECK: store <4 x double> + // CHECK: store <4 x double> splat (double 1.000000e+00) dout[0] = (double4)(!0.0f); // CHECK: store <4 x double> zeroinitializer dout[1] = (double4)(!1.0f); diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl index 5cc4e2b246003a1..d5ef01d55240c54 100644 --- a/clang/test/CodeGenOpenCL/partial_initializer.cl +++ b/clang/test/CodeGenOpenCL/partial_initializer.cl @@ -21,7 +21,7 @@ StrucTy GS = {1, 2}; // CHECK: @GV1 ={{.*}} addrspace(1) global <4 x i32> , align 16 int4 GV1 = (int4)((int2)(1,2),3,4); -// CHECK: @GV2 ={{.*}} addrspace(1) global <4 x i32> , align 16 +// CHECK: @GV2 ={{.*}} addrspace(1) global <4 x i32> splat (i32 1), align 16 int4 GV2 = (int4)(1); // CHECK: @__const.f.S = private unnamed_addr addrspace(2) constant %struct.StrucTy { i32 1, i32 2, i32 0 }, align 4 @@ -57,7 +57,7 @@ void f(void) { // CHECK: store <4 x i32> %[[v7]], ptr %[[V1]], align 16 int4 V1 = (int4)((int2)(1,2),3,4); - // CHECK: store <4 x i32> , ptr %[[V2]], align 16 + // CHECK: store <4 x i32> splat (i32 1), ptr %[[V2]], align 16 int4 V2 = (int4)(1); } diff --git a/clang/test/CodeGenOpenCL/shifts.cl b/clang/test/CodeGenOpenCL/shifts.cl index 98a464b74ce58c3..d08aa232cd3f8ce 100644 --- a/clang/test/CodeGenOpenCL/shifts.cl +++ b/clang/test/CodeGenOpenCL/shifts.cl @@ -47,7 +47,7 @@ typedef __attribute__((ext_vector_type(4))) int int4; //OPT: @vectorVectorTest int4 vectorVectorTest(int4 a,int4 b) { - //OPT: [[VM:%.+]] = and <4 x i32> %b, + //OPT: [[VM:%.+]] = and <4 x i32> %b, splat (i32 31) //OPT-NEXT: [[VC:%.+]] = shl <4 x i32> %a, [[VM]] int4 c = a << b; //OPT-NEXT: [[VF:%.+]] = add <4 x i32> [[VC]], @@ -62,10 +62,10 @@ int4 vectorVectorTest(int4 a,int4 b) { int4 vectorScalarTest(int4 a,int b) { //NOOPT: [[SP0:%.+]] = insertelement <4 x i32> poison //NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> poison, <4 x i32> zeroinitializer - //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], + //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], splat (i32 31) //NOOPT: [[VSC:%.+]] = shl <4 x i32> [[VSS:%.+]], [[VSM]] int4 c = a << b; - //NOOPT: [[VSF:%.+]] = shl <4 x i32> [[VSC1:%.+]], + //NOOPT: [[VSF:%.+]] = shl <4 x i32> [[VSC1:%.+]], splat (i32 2) //NOOPT: [[VSA:%.+]] = add <4 x i32> [[VSC2:%.+]], [[VSF]] int4 d = {1, 1, 1, 1}; int4 f = c + (d << 34); diff --git a/clang/test/CodeGenOpenCL/vector_literals.cl b/clang/test/CodeGenOpenCL/vector_literals.cl index f69f339ca99aaf4..3bbb9e566dbcab3 100644 --- a/clang/test/CodeGenOpenCL/vector_literals.cl +++ b/clang/test/CodeGenOpenCL/vector_literals.cl @@ -48,7 +48,7 @@ void vector_literals_valid() { //CHECK: shufflevector <4 x i32> , <4 x i32> %{{.+}}, <4 x i32> int4 a_1_3 = (int4)(1, (int3)(2, 3, 4)); - //CHECK: store <4 x i32> , ptr %a + //CHECK: store <4 x i32> splat (i32 1), ptr %a int4 a = (int4)(1); //CHECK: load <4 x i32>, ptr %a @@ -60,7 +60,7 @@ void vector_literals_valid() { //CHECK: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> int8 b = (int8)(1, 2, a.xy, a); - //CHECK: store <4 x float> , ptr %V2 + //CHECK: store <4 x float> splat (float 1.000000e+00), ptr %V2 float4 V2 = (float4)(1); } diff --git a/clang/test/Headers/__clang_hip_math_deprecated.hip b/clang/test/Headers/__clang_hip_math_deprecated.hip index 17b90eda20572d9..caba3e9ad83d187 100644 --- a/clang/test/Headers/__clang_hip_math_deprecated.hip +++ b/clang/test/Headers/__clang_hip_math_deprecated.hip @@ -21,7 +21,7 @@ extern "C" __device__ _Float16 test_rcpf16_wrapper(_Float16 x) { // CHECK-LABEL: @test_rcp2f16_wrapper( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DIV_I:%.*]] = fdiv contract <2 x half> , [[X:%.*]] +// CHECK-NEXT: [[DIV_I:%.*]] = fdiv contract <2 x half> splat (half 0xH3C00), [[X:%.*]] // CHECK-NEXT: ret <2 x half> [[DIV_I]] // extern "C" __device__ __2f16 test_rcp2f16_wrapper(__2f16 x) { diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c index 8da1d97fe132809..d27756259fa2f64 100644 --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -482,7 +482,7 @@ v128_t test_f64x2_const(void) { // CHECK-LABEL: @test_i8x16_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 707406378) // v128_t test_i8x16_const_splat(void) { return wasm_i8x16_const_splat(42); @@ -490,7 +490,7 @@ v128_t test_i8x16_const_splat(void) { // CHECK-LABEL: @test_u8x16_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 707406378) // v128_t test_u8x16_const_splat(void) { return wasm_u8x16_const_splat(42); @@ -498,7 +498,7 @@ v128_t test_u8x16_const_splat(void) { // CHECK-LABEL: @test_i16x8_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 2752554) // v128_t test_i16x8_const_splat(void) { return wasm_i16x8_const_splat(42); @@ -506,7 +506,7 @@ v128_t test_i16x8_const_splat(void) { // CHECK-LABEL: @test_u16x8_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 2752554) // v128_t test_u16x8_const_splat(void) { return wasm_u16x8_const_splat(42); @@ -514,7 +514,7 @@ v128_t test_u16x8_const_splat(void) { // CHECK-LABEL: @test_i32x4_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 42) // v128_t test_i32x4_const_splat(void) { return wasm_i32x4_const_splat(42); @@ -522,7 +522,7 @@ v128_t test_i32x4_const_splat(void) { // CHECK-LABEL: @test_u32x4_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 42) // v128_t test_u32x4_const_splat(void) { return wasm_u32x4_const_splat(42); @@ -546,7 +546,7 @@ v128_t test_u64x2_const_splat(void) { // CHECK-LABEL: @test_f32x4_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 1109917696) // v128_t test_f32x4_const_splat(void) { return wasm_f32x4_const_splat(42); @@ -1462,7 +1462,7 @@ v128_t test_f64x2_ge(v128_t a, v128_t b) { // CHECK-LABEL: @test_v128_not( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) // CHECK-NEXT: ret <4 x i32> [[NOT_I]] // v128_t test_v128_not(v128_t a) { @@ -1498,7 +1498,7 @@ v128_t test_v128_xor(v128_t a, v128_t b) { // CHECK-LABEL: @test_v128_andnot( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1) // CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[A:%.*]], [[NOT_I]] // CHECK-NEXT: ret <4 x i32> [[AND_I]] // diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 4bc7d9e68280d53..53f2cda6d36a4f4 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1689,6 +1689,23 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, if (isa(CV) || isa(CV)) { auto *CVVTy = cast(CV->getType()); Type *ETy = CVVTy->getElementType(); + + // Use the same shorthand for splat vector (i.e. "splat(Ty val)") as is + // permitted on IR input to reduce the output changes when enabling + // UseConstant{Int,FP}ForFixedLengthSplat. + // TODO: Remove this block when the UseConstant{Int,FP}ForFixedLengthSplat + // options are removed. + if (auto *SplatVal = CV->getSplatValue()) { + if (isa(SplatVal) || isa(SplatVal)) { + Out << "splat ("; + WriterCtx.TypePrinter->print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, SplatVal, WriterCtx); + Out << ')'; + return; + } + } + Out << '<'; WriterCtx.TypePrinter->print(ETy, Out); Out << ' '; diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll index aaffd97b92b2de9..b329a5607acb975 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll @@ -70,15 +70,15 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; CHECK-LABEL: 'fneg_idiom' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half 0xH8000, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> splat (float -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F16 = fsub half -0.0, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll index 52f6f73525a3b9f..303bcfa289577ce 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll @@ -2091,11 +2091,11 @@ define void @extmulv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6 define void @extmul_const(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i64) { ; CHECK-LABEL: 'extmul_const' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, splat (i16 10) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, splat (i16 10) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16b = zext <8 x i8> %i8 to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, splat (i16 255) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %aal_8_16 = mul <8 x i16> %zl1_8_16b, %and ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll index 2ceaf0c6f536af4..ada0be66c27b5a7 100644 --- a/llvm/test/Analysis/CostModel/AArch64/div.ll +++ b/llvm/test/Analysis/CostModel/AArch64/div.ll @@ -192,21 +192,21 @@ define i32 @sdiv_uniformconst() { ; CHECK-LABEL: 'sdiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = sdiv i128 undef, 7 @@ -238,21 +238,21 @@ define i32 @udiv_uniformconst() { ; CHECK-LABEL: 'udiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = udiv i128 undef, 7 @@ -376,21 +376,21 @@ define i32 @sdiv_uniformconstpow2() { ; CHECK-LABEL: 'sdiv_uniformconstpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = sdiv i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = sdiv i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = sdiv i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = sdiv i128 undef, 16 @@ -422,21 +422,21 @@ define i32 @udiv_uniformconstpow2() { ; CHECK-LABEL: 'udiv_uniformconstpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = udiv i128 undef, 16 @@ -560,21 +560,21 @@ define i32 @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, -16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = sdiv i128 undef, -16 @@ -606,21 +606,21 @@ define i32 @udiv_uniformconstnegpow2() { ; CHECK-LABEL: 'udiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, -16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = udiv i128 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll index dfed023f0119d4a..dd93aed53c0f2a0 100644 --- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll +++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define <16 x i8> @sdiv8xi16(<16 x i8> %x) { ; CHECK-LABEL: 'sdiv8xi16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div ; %div = sdiv <16 x i8> %x, @@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) { define <8 x i16> @sdiv16xi8(<8 x i16> %x) { ; CHECK-LABEL: 'sdiv16xi8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div ; %div = sdiv <8 x i16> %x, @@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) { define <4 x i32> @sdiv32xi4(<4 x i32> %x) { ; CHECK-LABEL: 'sdiv32xi4' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = sdiv <4 x i32> %x, @@ -34,7 +34,7 @@ define <4 x i32> @sdiv32xi4(<4 x i32> %x) { define <16 x i8> @udiv8xi16(<16 x i8> %x) { ; CHECK-LABEL: 'udiv8xi16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, splat (i8 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div ; %div = udiv <16 x i8> %x, @@ -43,7 +43,7 @@ define <16 x i8> @udiv8xi16(<16 x i8> %x) { define <8 x i16> @udiv16xi8(<8 x i16> %x) { ; CHECK-LABEL: 'udiv16xi8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, splat (i16 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div ; %div = udiv <8 x i16> %x, @@ -52,7 +52,7 @@ define <8 x i16> @udiv16xi8(<8 x i16> %x) { define <4 x i32> @udiv32xi4(<4 x i32> %x) { ; CHECK-LABEL: 'udiv32xi4' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, splat (i32 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = udiv <4 x i32> %x, diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll index eab4efe0613b5dd..632f26dfa538298 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll @@ -96,7 +96,7 @@ declare i19 @llvm.fshl.i19(i19, i19, i19) define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl ; entry: @@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl ; entry: @@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl ; entry: @@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll index 1876f0410c23374..a0a579ae96a9ba1 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll @@ -96,7 +96,7 @@ declare i19 @llvm.fshr.i19(i19, i19, i19) define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr ; entry: @@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr ; entry: @@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr ; entry: @@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/logicalop.ll b/llvm/test/Analysis/CostModel/AArch64/logicalop.ll index ab4db969cb281af..5c71cf02741890c 100644 --- a/llvm/test/Analysis/CostModel/AArch64/logicalop.ll +++ b/llvm/test/Analysis/CostModel/AArch64/logicalop.ll @@ -31,14 +31,14 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll index 6f9f64a26851a1f..de13125f665ecb0 100644 --- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll +++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll @@ -190,19 +190,19 @@ declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) { ; CHECK: gather_load_4xi8_constant_mask ; CHECK-NEON-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-128-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-256-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef) ; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-512-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef) ; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) @@ -235,19 +235,19 @@ declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32 immarg, <4 define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) { ; CHECK: scatter_store_4xi8_constant_mask ; CHECK-NEON-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-512-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) @@ -280,19 +280,19 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1> define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) { ; CHECK: gather_load_4xi32_constant_mask ; CHECK-NEON-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-128-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-256-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-512-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) @@ -325,19 +325,19 @@ declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, < define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) { ; CHECK: scatter_store_4xi32_constant_mask ; CHECK-NEON-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-512-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) diff --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll index 7519bf1f7c5c6fa..2f1e8c8bf8dfa44 100644 --- a/llvm/test/Analysis/CostModel/AArch64/rem.ll +++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll @@ -178,21 +178,21 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; CHECK-LABEL: 'srem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -221,21 +221,21 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; CHECK-LABEL: 'urem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -350,21 +350,21 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; CHECK-LABEL: 'srem_uniformconstpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = srem i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = srem i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -393,21 +393,21 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; CHECK-LABEL: 'urem_uniformconstpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -522,21 +522,21 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -565,21 +565,21 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; CHECK-LABEL: 'urem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/div.ll b/llvm/test/Analysis/CostModel/AMDGPU/div.ll index 406b67b9d459404..459c41a0b3eb5be 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/div.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/div.ll @@ -337,59 +337,59 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; FAST-LABEL: 'sdiv_uniformconst' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'sdiv_uniformconst' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'sdiv_uniformconst' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -418,59 +418,59 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; FAST-LABEL: 'udiv_uniformconst' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'udiv_uniformconst' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'udiv_uniformconst' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -661,59 +661,59 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; FAST-LABEL: 'sdiv_uniformconstpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'sdiv_uniformconstpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'sdiv_uniformconstpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -742,59 +742,59 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; FAST-LABEL: 'udiv_uniformconstpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'udiv_uniformconstpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'udiv_uniformconstpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -985,59 +985,59 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; FAST-LABEL: 'sdiv_uniformconstnegpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'sdiv_uniformconstnegpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'sdiv_uniformconstnegpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -1066,59 +1066,59 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; FAST-LABEL: 'udiv_uniformconstnegpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'udiv_uniformconstnegpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'udiv_uniformconstnegpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index 911b4319eaa4e7a..ab9d7f9dc859dec 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -314,146 +314,146 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 { define amdgpu_kernel void @rcp_ieee() #0 { ; CIFASTF64-LABEL: 'rcp_ieee' ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'rcp_ieee' ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'rcp_ieee' ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'rcp_ieee' ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'rcp_ieee' ; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'rcp_ieee' ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'rcp_ieee' ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'rcp_ieee' ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half 1.0, undef @@ -477,146 +477,146 @@ define amdgpu_kernel void @rcp_ieee() #0 { define amdgpu_kernel void @rcp_ftzdaz() #1 { ; CIFASTF64-LABEL: 'rcp_ftzdaz' ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'rcp_ftzdaz' ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'rcp_ftzdaz' ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'rcp_ftzdaz' ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'rcp_ftzdaz' ; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'rcp_ftzdaz' ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'rcp_ftzdaz' ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'rcp_ftzdaz' ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half 1.0, undef diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll index 9af5dd352d5883b..734e76bd4053f55 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll @@ -139,24 +139,24 @@ define void @fneg_f64() { define i32 @fneg_idiom(i32 %arg) { ; CHECK-LABEL: 'fneg_idiom' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SIZE-LABEL: 'fneg_idiom' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll index 1bd73056e5b83dc..32e8ba51d6887e6 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll @@ -32,22 +32,22 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> , <2 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> splat (i1 true), <2 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> , <2 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> splat (i1 true), <2 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll index e6193791ff53a07..ca5d2fdd938b561 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll @@ -215,78 +215,78 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SLOW16-LABEL: 'mul_uniformconstpow2' ; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; FAST16-LABEL: 'mul_uniformconstpow2' ; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW16-SIZE-LABEL: 'mul_uniformconstpow2' ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; FAST16-SIZE-LABEL: 'mul_uniformconstpow2' ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -415,78 +415,78 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SLOW16-LABEL: 'mul_uniformconstnegpow2' ; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; FAST16-LABEL: 'mul_uniformconstnegpow2' ; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW16-SIZE-LABEL: 'mul_uniformconstnegpow2' ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; FAST16-SIZE-LABEL: 'mul_uniformconstnegpow2' ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll index 87e35c7627de05c..7e37566db441721 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll @@ -337,59 +337,59 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; FAST-LABEL: 'srem_uniformconst' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'srem_uniformconst' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'srem_uniformconst' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -418,59 +418,59 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; FAST-LABEL: 'urem_uniformconst' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'urem_uniformconst' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'urem_uniformconst' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -661,59 +661,59 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; FAST-LABEL: 'srem_uniformconstpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'srem_uniformconstpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'srem_uniformconstpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -742,59 +742,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; FAST-LABEL: 'urem_uniformconstpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'urem_uniformconstpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'urem_uniformconstpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -985,59 +985,59 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; FAST-LABEL: 'srem_uniformconstnegpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'srem_uniformconstnegpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'srem_uniformconstnegpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -1066,59 +1066,59 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; FAST-LABEL: 'urem_uniformconstnegpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'urem_uniformconstnegpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'urem_uniformconstnegpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll index 36c25850323274a..9f0c29c8bb0c650 100644 --- a/llvm/test/Analysis/CostModel/ARM/divrem.ll +++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll @@ -1031,98 +1031,98 @@ define void @vf64() { define void @vi8_2() { ; CHECK-NEON-LABEL: 'vi8_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi8_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi8_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi8_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi8_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i8> undef, @@ -1146,98 +1146,98 @@ define void @vi8_2() { define void @vi16_2() { ; CHECK-NEON-LABEL: 'vi16_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi16_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi16_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi16_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi16_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i16> undef, @@ -1261,98 +1261,98 @@ define void @vi16_2() { define void @vi32_2() { ; CHECK-NEON-LABEL: 'vi32_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi32_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi32_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi32_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi32_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i32> undef, @@ -1376,98 +1376,98 @@ define void @vi32_2() { define void @vi64_2() { ; CHECK-NEON-LABEL: 'vi64_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi64_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi64_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi64_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi64_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i64> undef, @@ -1491,48 +1491,48 @@ define void @vi64_2() { define void @vf16_2() { ; CHECK-NEON-LABEL: 'vf16_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf16_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf16_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf16_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf16_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x half> undef, @@ -1546,48 +1546,48 @@ define void @vf16_2() { define void @vf32_2() { ; CHECK-NEON-LABEL: 'vf32_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf32_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf32_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf32_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf32_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x float> undef, @@ -1601,48 +1601,48 @@ define void @vf32_2() { define void @vf64_2() { ; CHECK-NEON-LABEL: 'vf64_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf64_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf64_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf64_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf64_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x double> undef, diff --git a/llvm/test/Analysis/CostModel/ARM/logicalop.ll b/llvm/test/Analysis/CostModel/ARM/logicalop.ll index d70a4fac4d26c73..967426c727ea73d 100644 --- a/llvm/test/Analysis/CostModel/ARM/logicalop.ll +++ b/llvm/test/Analysis/CostModel/ARM/logicalop.ll @@ -80,56 +80,56 @@ define void @vecop() { ; CHECK-MVE-RECIP-LABEL: 'vecop' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %band = and <4 x i1> undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-RECIP-LABEL: 'vecop' ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-THUMB1-RECIP-LABEL: 'vecop' ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB2-RECIP-LABEL: 'vecop' ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-MVE-SIZE-LABEL: 'vecop' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-NEON-SIZE-LABEL: 'vecop' ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB1-SIZE-LABEL: 'vecop' ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB2-SIZE-LABEL: 'vecop' ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll b/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll index 495423b2c334586..3b0578d8f2d0596 100644 --- a/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll +++ b/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll @@ -33,7 +33,7 @@ define void @vecop() { ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll index c976f483fdfec29..5afc199e52d43a3 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll @@ -1173,9 +1173,9 @@ define void @add_of_constant() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = add <4 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = add <4 x i32> zeroinitializer, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = add <2 x i64> zeroinitializer, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> splat (i32 1), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> splat (i64 1), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> splat (i32 4096), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> , undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> , undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> , undef @@ -1191,9 +1191,9 @@ define void @add_of_constant() { ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = add <4 x i32> undef, undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = add <4 x i32> zeroinitializer, undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = add <2 x i64> zeroinitializer, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> , undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> , undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> , undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> splat (i32 1), undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> splat (i64 1), undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> splat (i32 4096), undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> , undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> , undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> , undef diff --git a/llvm/test/Analysis/CostModel/RISCV/logicalop.ll b/llvm/test/Analysis/CostModel/RISCV/logicalop.ll index 891c53b535dc61e..80f66cfdc3e096b 100644 --- a/llvm/test/Analysis/CostModel/RISCV/logicalop.ll +++ b/llvm/test/Analysis/CostModel/RISCV/logicalop.ll @@ -38,14 +38,14 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll index 2448056076d8b43..6fc98da0eea97e8 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll @@ -514,9 +514,9 @@ define void @store_of_constant(ptr %p) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> , ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> , ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> , ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 @@ -532,9 +532,9 @@ define void @store_of_constant(ptr %p) { ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> , ptr %p, align 16 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> , ptr %p, align 32 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> , ptr %p, align 16 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll index 67b99f573aad21c..344a34fa9a63097 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll @@ -24,7 +24,7 @@ define <4 x i8> @phi_v4i8_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ splat (i8 1), %a ], [ splat (i8 2), %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %x ; br i1 %c, label %a, label %b @@ -79,7 +79,7 @@ define <4 x i8> @phi_v4i8_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ splat (i8 1), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %x ; br i1 %c, label %a, label %b @@ -97,7 +97,7 @@ define <4 x i16> @phi_v4i16_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ splat (i16 1), %a ], [ splat (i16 2), %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %x ; br i1 %c, label %a, label %b @@ -152,7 +152,7 @@ define <4 x i16> @phi_v4i16_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ splat (i16 1), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %x ; br i1 %c, label %a, label %b @@ -170,7 +170,7 @@ define <4 x i32> @phi_v4i32_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ splat (i32 1), %a ], [ splat (i32 2), %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x ; br i1 %c, label %a, label %b @@ -225,7 +225,7 @@ define <4 x i32> @phi_v4i32_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ splat (i32 1), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x ; br i1 %c, label %a, label %b @@ -243,7 +243,7 @@ define <4 x i64> @phi_v4i64_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ splat (i64 1), %a ], [ splat (i64 2), %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %x ; br i1 %c, label %a, label %b @@ -298,7 +298,7 @@ define <4 x i64> @phi_v4i64_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ splat (i64 1), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %x ; br i1 %c, label %a, label %b @@ -316,7 +316,7 @@ define <4 x half> @phi_v4f16_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ splat (half 0xH3C00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %x ; br i1 %c, label %a, label %b @@ -353,7 +353,7 @@ define <4 x half> @phi_v4f16_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ splat (half 0xH3C00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %x ; br i1 %c, label %a, label %b @@ -371,7 +371,7 @@ define <4 x float> @phi_v4f32_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ splat (float 1.000000e+00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %x ; br i1 %c, label %a, label %b @@ -408,7 +408,7 @@ define <4 x float> @phi_v4f32_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ splat (float 1.000000e+00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %x ; br i1 %c, label %a, label %b @@ -426,7 +426,7 @@ define <4 x double> @phi_v4f64_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ splat (double 1.000000e+00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %x ; br i1 %c, label %a, label %b @@ -463,7 +463,7 @@ define <4 x double> @phi_v4f64_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ splat (double 1.000000e+00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %x ; br i1 %c, label %a, label %b diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll index ef17d8dc60c1450..6ab8ac64d64e4ae 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll @@ -393,7 +393,7 @@ define void @select() { define void @select_of_constants() { ; CHECK-LABEL: 'select_of_constants' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> splat (i64 128), <2 x i64> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = select i1 undef, <2 x i64> , <2 x i64> diff --git a/llvm/test/Analysis/CostModel/SystemZ/divrem-pow2.ll b/llvm/test/Analysis/CostModel/SystemZ/divrem-pow2.ll index b43a46c882805fd..ebc8d83b7ac6e2e 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/divrem-pow2.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/divrem-pow2.ll @@ -85,7 +85,7 @@ define i8 @fun7(i8 %a) { define <2 x i64> @fun8(<2 x i64> %a) { ; COST-LABEL: 'fun8' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, splat (i64 2) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = sdiv <2 x i64> %a, @@ -94,7 +94,7 @@ define <2 x i64> @fun8(<2 x i64> %a) { define <2 x i64> @fun9(<2 x i64> %a) { ; COST-LABEL: 'fun9' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, splat (i64 -4) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = sdiv <2 x i64> %a, @@ -103,7 +103,7 @@ define <2 x i64> @fun9(<2 x i64> %a) { define <4 x i32> @fun10(<4 x i32> %a) { ; COST-LABEL: 'fun10' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = sdiv <4 x i32> %a, @@ -112,7 +112,7 @@ define <4 x i32> @fun10(<4 x i32> %a) { define <4 x i32> @fun11(<4 x i32> %a) { ; COST-LABEL: 'fun11' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, splat (i32 -16) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = sdiv <4 x i32> %a, @@ -121,7 +121,7 @@ define <4 x i32> @fun11(<4 x i32> %a) { define <2 x i32> @fun12(<2 x i32> %a) { ; COST-LABEL: 'fun12' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i32> %a, splat (i32 -16) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = sdiv <2 x i32> %a, @@ -130,7 +130,7 @@ define <2 x i32> @fun12(<2 x i32> %a) { define <8 x i16> @fun13(<8 x i16> %a) { ; COST-LABEL: 'fun13' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = sdiv <8 x i16> %a, @@ -139,7 +139,7 @@ define <8 x i16> @fun13(<8 x i16> %a) { define <8 x i16> @fun14(<8 x i16> %a) { ; COST-LABEL: 'fun14' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, splat (i16 -64) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = sdiv <8 x i16> %a, @@ -148,7 +148,7 @@ define <8 x i16> @fun14(<8 x i16> %a) { define <4 x i16> @fun15(<4 x i16> %a) { ; COST-LABEL: 'fun15' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r ; %r = sdiv <4 x i16> %a, @@ -157,7 +157,7 @@ define <4 x i16> @fun15(<4 x i16> %a) { define <16 x i8> @fun16(<16 x i8> %a) { ; COST-LABEL: 'fun16' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, splat (i8 64) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = sdiv <16 x i8> %a, @@ -166,7 +166,7 @@ define <16 x i8> @fun16(<16 x i8> %a) { define <16 x i8> @fun17(<16 x i8> %a) { ; COST-LABEL: 'fun17' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = sdiv <16 x i8> %a, @@ -175,7 +175,7 @@ define <16 x i8> @fun17(<16 x i8> %a) { define <8 x i8> @fun18(<8 x i8> %a) { ; COST-LABEL: 'fun18' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r ; %r = sdiv <8 x i8> %a, @@ -224,7 +224,7 @@ define i8 @fun22(i8 %a) { define <2 x i64> @fun23(<2 x i64> %a) { ; COST-LABEL: 'fun23' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i64> %a, splat (i64 2) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = udiv <2 x i64> %a, @@ -233,7 +233,7 @@ define <2 x i64> @fun23(<2 x i64> %a) { define <4 x i32> @fun24(<4 x i32> %a) { ; COST-LABEL: 'fun24' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = udiv <4 x i32> %a, @@ -242,7 +242,7 @@ define <4 x i32> @fun24(<4 x i32> %a) { define <2 x i32> @fun25(<2 x i32> %a) { ; COST-LABEL: 'fun25' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = udiv <2 x i32> %a, @@ -251,7 +251,7 @@ define <2 x i32> @fun25(<2 x i32> %a) { define <8 x i16> @fun26(<8 x i16> %a) { ; COST-LABEL: 'fun26' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = udiv <8 x i16> %a, @@ -260,7 +260,7 @@ define <8 x i16> @fun26(<8 x i16> %a) { define <4 x i16> @fun27(<4 x i16> %a) { ; COST-LABEL: 'fun27' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r ; %r = udiv <4 x i16> %a, @@ -269,7 +269,7 @@ define <4 x i16> @fun27(<4 x i16> %a) { define <16 x i8> @fun28(<16 x i8> %a) { ; COST-LABEL: 'fun28' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <16 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = udiv <16 x i8> %a, @@ -278,7 +278,7 @@ define <16 x i8> @fun28(<16 x i8> %a) { define <8 x i8> @fun29(<8 x i8> %a) { ; COST-LABEL: 'fun29' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r ; %r = udiv <8 x i8> %a, @@ -363,7 +363,7 @@ define i8 @fun37(i8 %a) { define <2 x i64> @fun38(<2 x i64> %a) { ; COST-LABEL: 'fun38' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i64> %a, splat (i64 2) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = srem <2 x i64> %a, @@ -372,7 +372,7 @@ define <2 x i64> @fun38(<2 x i64> %a) { define <2 x i64> @fun39(<2 x i64> %a) { ; COST-LABEL: 'fun39' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i64> %a, splat (i64 -4) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = srem <2 x i64> %a, @@ -381,7 +381,7 @@ define <2 x i64> @fun39(<2 x i64> %a) { define <4 x i32> @fun40(<4 x i32> %a) { ; COST-LABEL: 'fun40' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = srem <4 x i32> %a, @@ -390,7 +390,7 @@ define <4 x i32> @fun40(<4 x i32> %a) { define <4 x i32> @fun41(<4 x i32> %a) { ; COST-LABEL: 'fun41' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i32> %a, splat (i32 -16) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = srem <4 x i32> %a, @@ -399,7 +399,7 @@ define <4 x i32> @fun41(<4 x i32> %a) { define <2 x i32> @fun42(<2 x i32> %a) { ; COST-LABEL: 'fun42' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i32> %a, splat (i32 -16) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = srem <2 x i32> %a, @@ -408,7 +408,7 @@ define <2 x i32> @fun42(<2 x i32> %a) { define <8 x i16> @fun43(<8 x i16> %a) { ; COST-LABEL: 'fun43' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = srem <8 x i16> %a, @@ -417,7 +417,7 @@ define <8 x i16> @fun43(<8 x i16> %a) { define <8 x i16> @fun44(<8 x i16> %a) { ; COST-LABEL: 'fun44' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i16> %a, splat (i16 -64) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = srem <8 x i16> %a, @@ -426,7 +426,7 @@ define <8 x i16> @fun44(<8 x i16> %a) { define <4 x i16> @fun45(<4 x i16> %a) { ; COST-LABEL: 'fun45' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r ; %r = srem <4 x i16> %a, @@ -435,7 +435,7 @@ define <4 x i16> @fun45(<4 x i16> %a) { define <16 x i8> @fun46(<16 x i8> %a) { ; COST-LABEL: 'fun46' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <16 x i8> %a, splat (i8 64) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = srem <16 x i8> %a, @@ -444,7 +444,7 @@ define <16 x i8> @fun46(<16 x i8> %a) { define <16 x i8> @fun47(<16 x i8> %a) { ; COST-LABEL: 'fun47' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <16 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = srem <16 x i8> %a, @@ -453,7 +453,7 @@ define <16 x i8> @fun47(<16 x i8> %a) { define <8 x i8> @fun48(<8 x i8> %a) { ; COST-LABEL: 'fun48' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r ; %r = srem <8 x i8> %a, @@ -502,7 +502,7 @@ define i8 @fun52(i8 %a) { define <2 x i64> @fun53(<2 x i64> %a) { ; COST-LABEL: 'fun53' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <2 x i64> %a, splat (i64 2) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = urem <2 x i64> %a, @@ -511,7 +511,7 @@ define <2 x i64> @fun53(<2 x i64> %a) { define <4 x i32> @fun54(<4 x i32> %a) { ; COST-LABEL: 'fun54' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <4 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = urem <4 x i32> %a, @@ -520,7 +520,7 @@ define <4 x i32> @fun54(<4 x i32> %a) { define <2 x i32> @fun55(<2 x i32> %a) { ; COST-LABEL: 'fun55' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <2 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <2 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = urem <2 x i32> %a, @@ -529,7 +529,7 @@ define <2 x i32> @fun55(<2 x i32> %a) { define <8 x i16> @fun56(<8 x i16> %a) { ; COST-LABEL: 'fun56' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <8 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = urem <8 x i16> %a, @@ -538,7 +538,7 @@ define <8 x i16> @fun56(<8 x i16> %a) { define <4 x i16> @fun57(<4 x i16> %a) { ; COST-LABEL: 'fun57' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <4 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <4 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r ; %r = urem <4 x i16> %a, @@ -547,7 +547,7 @@ define <4 x i16> @fun57(<4 x i16> %a) { define <16 x i8> @fun58(<16 x i8> %a) { ; COST-LABEL: 'fun58' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <16 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = urem <16 x i8> %a, @@ -556,7 +556,7 @@ define <16 x i8> @fun58(<16 x i8> %a) { define <8 x i8> @fun59(<8 x i8> %a) { ; COST-LABEL: 'fun59' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <8 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <8 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r ; %r = urem <8 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/SystemZ/logicalop.ll b/llvm/test/Analysis/CostModel/SystemZ/logicalop.ll index 459ed649eb3f3dc..3c3fa0b530dd80d 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/logicalop.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/logicalop.ll @@ -30,14 +30,14 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll index b965a726262e68e..f5b434881436ede 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll @@ -130,46 +130,46 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; SSE1-LABEL: 'fneg_idiom' ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg_idiom' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'fneg_idiom' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg_idiom' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll index c147bd2eef6e7e1..74fecc6c4a5747b 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll @@ -218,90 +218,90 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; SSE1-LABEL: 'fneg_idiom' ; SSE1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg_idiom' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fneg_idiom' ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'fneg_idiom' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fneg_idiom' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg_idiom' ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'fneg_idiom' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'fneg_idiom' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll index d9312ac05601deb..a801a187b46cbdb 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll @@ -196,79 +196,79 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; SSE1-LABEL: 'fneg_idiom' ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg_idiom' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fneg_idiom' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'fneg_idiom' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg_idiom' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'fneg_idiom' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'fneg_idiom' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index 90871e3a3831c0d..d26c856f2707620 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -218,90 +218,90 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; SSE1-LABEL: 'fneg_idiom' ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg_idiom' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fneg_idiom' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'fneg_idiom' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fneg_idiom' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg_idiom' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fneg_idiom' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'fneg_idiom' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/X86/div-codesize.ll b/llvm/test/Analysis/CostModel/X86/div-codesize.ll index 801b5f4e9ec06e6..88c4a5dc1850376 100644 --- a/llvm/test/Analysis/CostModel/X86/div-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/div-codesize.ll @@ -186,21 +186,21 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; CHECK-LABEL: 'sdiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; CHECK-LABEL: 'udiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -548,97 +548,97 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -667,97 +667,97 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; SSE-LABEL: 'udiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -872,21 +872,21 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -915,21 +915,21 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; CHECK-LABEL: 'udiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/div-latency.ll b/llvm/test/Analysis/CostModel/X86/div-latency.ll index 6afb9c790e35203..aada70124a9b47d 100644 --- a/llvm/test/Analysis/CostModel/X86/div-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/div-latency.ll @@ -186,21 +186,21 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; CHECK-LABEL: 'sdiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; CHECK-LABEL: 'udiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -586,116 +586,116 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sdiv_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -724,116 +724,116 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; SSE-LABEL: 'udiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'udiv_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -948,21 +948,21 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -991,21 +991,21 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; CHECK-LABEL: 'udiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll index d54dae0a21a066c..efb853e3721b547 100644 --- a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll @@ -186,21 +186,21 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; CHECK-LABEL: 'sdiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; CHECK-LABEL: 'udiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -548,97 +548,97 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -667,97 +667,97 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; SSE-LABEL: 'udiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -872,21 +872,21 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -915,21 +915,21 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; CHECK-LABEL: 'udiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll index 4bbd70e87a1a951..8ffd6835004f916 100644 --- a/llvm/test/Analysis/CostModel/X86/div.ll +++ b/llvm/test/Analysis/CostModel/X86/div.ll @@ -376,97 +376,97 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; SSE-LABEL: 'sdiv_uniformconst' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconst' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconst' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconst' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -495,97 +495,97 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; SSE-LABEL: 'udiv_uniformconst' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconst' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconst' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconst' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -928,135 +928,135 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE2-LABEL: 'sdiv_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sdiv_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'sdiv_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -1085,97 +1085,97 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; SSE-LABEL: 'udiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -1480,97 +1480,97 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; SSE-LABEL: 'sdiv_uniformconstnegpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -1599,97 +1599,97 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; SSE-LABEL: 'udiv_uniformconstnegpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll b/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll index 71927002b599fd2..fccabe39cd218f7 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll @@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) @@ -2602,51 +2602,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) @@ -2659,51 +2659,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) @@ -2716,79 +2716,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) @@ -2801,79 +2801,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshl-latency.ll b/llvm/test/Analysis/CostModel/X86/fshl-latency.ll index c40394ba9a72834..a82a07d8ce54d9d 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl-latency.ll @@ -1217,86 +1217,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1309,79 +1309,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1394,79 +1394,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1479,79 +1479,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) @@ -2554,51 +2554,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) @@ -2611,51 +2611,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) @@ -2668,79 +2668,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) @@ -2753,79 +2753,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll index 7b0daf504855056..c5e99b36e87ab10 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll @@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) @@ -2786,79 +2786,79 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) @@ -2871,79 +2871,79 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) @@ -2956,79 +2956,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) @@ -3041,79 +3041,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll index 127dec0a1a6f788..0ee9fff1229e5cf 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl.ll @@ -1210,86 +1210,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1302,79 +1302,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1387,79 +1387,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1472,79 +1472,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) @@ -2540,51 +2540,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) @@ -2597,51 +2597,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) @@ -2654,79 +2654,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) @@ -2739,79 +2739,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll index 92a20b938142a7e..0e4b635e8635127 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll @@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) @@ -2602,51 +2602,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) @@ -2659,51 +2659,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) @@ -2716,79 +2716,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) @@ -2801,79 +2801,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshr-latency.ll b/llvm/test/Analysis/CostModel/X86/fshr-latency.ll index 33fadef536bf74e..b71fd6c38fc7f31 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr-latency.ll @@ -1217,86 +1217,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1309,79 +1309,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1394,79 +1394,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1479,79 +1479,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) @@ -2554,51 +2554,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) @@ -2611,51 +2611,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) @@ -2668,79 +2668,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) @@ -2753,79 +2753,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll index ef831328c480e76..9b848d42a6f2884 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll @@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) @@ -2786,79 +2786,79 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) @@ -2871,79 +2871,79 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) @@ -2956,79 +2956,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) @@ -3041,79 +3041,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll index 3c233b51053ddc5..2b970ba50c086e3 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr.ll @@ -1210,86 +1210,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1302,79 +1302,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1387,79 +1387,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1472,79 +1472,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) @@ -2540,51 +2540,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) @@ -2597,51 +2597,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) @@ -2654,79 +2654,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) @@ -2739,79 +2739,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/logicalop.ll b/llvm/test/Analysis/CostModel/X86/logicalop.ll index d932bcd040eff06..75890d26b443140 100644 --- a/llvm/test/Analysis/CostModel/X86/logicalop.ll +++ b/llvm/test/Analysis/CostModel/X86/logicalop.ll @@ -41,14 +41,14 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll index 1e5c02afc2b3b4b..47472d57228585b 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 25d12da306aba3a..e18a4d0467caf8e 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 332d90ac4191c46..4ca1f7f419e6a5b 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll index 14dc561edc34d35..4fb2f1a50a6ed27 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll index a030068dfaf5250..6be81602ab15bd5 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/mul-codesize.ll b/llvm/test/Analysis/CostModel/X86/mul-codesize.ll index 4cf1df14387f86a..75585cf0a6e85c9 100644 --- a/llvm/test/Analysis/CostModel/X86/mul-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/mul-codesize.ll @@ -190,154 +190,154 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SSE2-LABEL: 'mul_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SSE2-LABEL: 'mul_uniformconstnegpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstnegpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstnegpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstnegpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/mul-latency.ll b/llvm/test/Analysis/CostModel/X86/mul-latency.ll index 176c3ed41bfa8e6..9245f2c5740f366 100644 --- a/llvm/test/Analysis/CostModel/X86/mul-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/mul-latency.ll @@ -190,154 +190,154 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SSE2-LABEL: 'mul_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SSE2-LABEL: 'mul_uniformconstnegpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstnegpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstnegpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstnegpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll index 020b62291012cca..2d1bf23e9699c91 100644 --- a/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll @@ -190,154 +190,154 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SSE2-LABEL: 'mul_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SSE2-LABEL: 'mul_uniformconstnegpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstnegpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstnegpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstnegpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll index 7e51dbf60cf9b31..4602928a981eace 100644 --- a/llvm/test/Analysis/CostModel/X86/mul.ll +++ b/llvm/test/Analysis/CostModel/X86/mul.ll @@ -190,154 +190,154 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SSE2-LABEL: 'mul_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SSE2-LABEL: 'mul_uniformconstnegpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstnegpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstnegpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstnegpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll index b47580ba0abfd19..3c172dda6867b68 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll @@ -186,21 +186,21 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; CHECK-LABEL: 'srem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; CHECK-LABEL: 'urem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -529,154 +529,154 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'srem_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -705,59 +705,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; SSE-LABEL: 'urem_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'urem_uniformconstpow2' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'urem_uniformconstpow2' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -872,21 +872,21 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -915,21 +915,21 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; CHECK-LABEL: 'urem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/rem-latency.ll b/llvm/test/Analysis/CostModel/X86/rem-latency.ll index 5264a75659d150a..6c62ccb8df71883 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-latency.ll @@ -186,21 +186,21 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; CHECK-LABEL: 'srem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; CHECK-LABEL: 'urem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -510,135 +510,135 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -667,59 +667,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; SSE-LABEL: 'urem_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'urem_uniformconstpow2' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'urem_uniformconstpow2' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -834,21 +834,21 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -877,21 +877,21 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; CHECK-LABEL: 'urem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll index 817a0c7e070ac6c..e0f0674c71f604e 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll @@ -186,21 +186,21 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; CHECK-LABEL: 'srem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; CHECK-LABEL: 'urem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -510,135 +510,135 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -667,59 +667,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; SSE-LABEL: 'urem_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'urem_uniformconstpow2' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'urem_uniformconstpow2' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -834,21 +834,21 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -877,21 +877,21 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; CHECK-LABEL: 'urem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll index 0e799d63741aa82..4138046ff961ed3 100644 --- a/llvm/test/Analysis/CostModel/X86/rem.ll +++ b/llvm/test/Analysis/CostModel/X86/rem.ll @@ -490,97 +490,97 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; SSE-LABEL: 'srem_uniformconst' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconst' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconst' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconst' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -609,97 +609,97 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; SSE-LABEL: 'urem_uniformconst' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'urem_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'urem_uniformconst' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_uniformconst' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_uniformconst' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -1004,173 +1004,173 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'srem_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'srem_uniformconstpow2' ; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -1199,59 +1199,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; SSE-LABEL: 'urem_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'urem_uniformconstpow2' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'urem_uniformconstpow2' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -1594,97 +1594,97 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; SSE-LABEL: 'srem_uniformconstnegpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -1713,97 +1713,97 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; SSE-LABEL: 'urem_uniformconstnegpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'urem_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'urem_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll index 204290bd4e40f2d..39a74cf6509bcd5 100644 --- a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll +++ b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll @@ -77,12 +77,12 @@ entry: define <4 x i32> @slm-costs_8_v4_zext_mul(<4 x i8> %a) { ; SLM-LABEL: 'slm-costs_8_v4_zext_mul' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 255) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_8_v4_zext_mul' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 255) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: @@ -242,12 +242,12 @@ entry: define <4 x i32> @slm-costs_16_v4_zext_mul(<4 x i16> %a) { ; SLM-LABEL: 'slm-costs_16_v4_zext_mul' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i16> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 65535) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_16_v4_zext_mul' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i16> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 65535) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: diff --git a/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll b/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll index 0986def7cef40df..495cc74b643c49e 100644 --- a/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll @@ -10,7 +10,7 @@ define <4 x i32> @test1(<4 x i32> %a) { ; CHECK-LABEL: 'test1' -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <4 x i32> %a, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = udiv <4 x i32> %a, @@ -19,19 +19,19 @@ define <4 x i32> @test1(<4 x i32> %a) { define <8 x i32> @test2(<8 x i32> %a) { ; SSE-LABEL: 'test2' -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %div = udiv <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %div = udiv <8 x i32> %a, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX1-LABEL: 'test2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <8 x i32> %a, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX2-LABEL: 'test2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX512-LABEL: 'test2' -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, splat (i32 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; %div = udiv <8 x i32> %a, @@ -40,7 +40,7 @@ define <8 x i32> @test2(<8 x i32> %a) { define <8 x i16> @test3(<8 x i16> %a) { ; CHECK-LABEL: 'test3' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <8 x i16> %a, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div ; %div = udiv <8 x i16> %a, @@ -49,19 +49,19 @@ define <8 x i16> @test3(<8 x i16> %a) { define <16 x i16> @test4(<16 x i16> %a) { ; SSE-LABEL: 'test4' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <16 x i16> %a, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX1-LABEL: 'test4' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = udiv <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = udiv <16 x i16> %a, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX2-LABEL: 'test4' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX512-LABEL: 'test4' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, splat (i16 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; %div = udiv <16 x i16> %a, @@ -70,7 +70,7 @@ define <16 x i16> @test4(<16 x i16> %a) { define <8 x i16> @test5(<8 x i16> %a) { ; CHECK-LABEL: 'test5' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i16> %a, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div ; %div = sdiv <8 x i16> %a, @@ -79,19 +79,19 @@ define <8 x i16> @test5(<8 x i16> %a) { define <16 x i16> @test6(<16 x i16> %a) { ; SSE-LABEL: 'test6' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX1-LABEL: 'test6' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX2-LABEL: 'test6' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX512-LABEL: 'test6' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; %div = sdiv <16 x i16> %a, @@ -100,7 +100,7 @@ define <16 x i16> @test6(<16 x i16> %a) { define <16 x i8> @test7(<16 x i8> %a) { ; CHECK-LABEL: 'test7' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i8> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i8> %a, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div ; %div = sdiv <16 x i8> %a, @@ -109,7 +109,7 @@ define <16 x i8> @test7(<16 x i8> %a) { define <4 x i32> @test8(<4 x i32> %a) { ; CHECK-LABEL: 'test8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <4 x i32> %a, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = sdiv <4 x i32> %a, @@ -118,19 +118,19 @@ define <4 x i32> @test8(<4 x i32> %a) { define <8 x i32> @test9(<8 x i32> %a) { ; SSE-LABEL: 'test9' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX1-LABEL: 'test9' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX2-LABEL: 'test9' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX512-LABEL: 'test9' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; %div = sdiv <8 x i32> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll index 9ff975665f13b6a..195c61921219f08 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll @@ -1391,23 +1391,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1416,31 +1416,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1449,31 +1449,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1482,7 +1482,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1491,31 +1491,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1524,31 +1524,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1557,7 +1557,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1566,39 +1566,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1607,39 +1607,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1648,35 +1648,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1685,35 +1685,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v32i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1722,35 +1722,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v64i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll index 67679f2cb856666..a3e4cf627642084 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll @@ -1509,19 +1509,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1530,27 +1530,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1559,27 +1559,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1588,7 +1588,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1597,27 +1597,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1626,27 +1626,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1655,7 +1655,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1664,43 +1664,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1709,43 +1709,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1754,43 +1754,43 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1799,43 +1799,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1844,43 +1844,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index efd378ee0b5a61d..1bdab183e548c5b 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -1509,19 +1509,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1530,27 +1530,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1559,27 +1559,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1588,7 +1588,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1597,27 +1597,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1626,27 +1626,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1655,7 +1655,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1664,43 +1664,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1709,43 +1709,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1754,43 +1754,43 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1799,43 +1799,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1844,43 +1844,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll index ab300779b4341cc..f96d0d4ef43cc21 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll @@ -1469,23 +1469,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1494,31 +1494,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1527,31 +1527,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1560,31 +1560,31 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1593,31 +1593,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1626,31 +1626,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1659,31 +1659,31 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1692,39 +1692,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1733,39 +1733,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1774,39 +1774,39 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1815,39 +1815,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1856,39 +1856,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll index 1b51a2e0a1e6e5e..c85f506d6b60ca4 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll @@ -1415,23 +1415,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1440,31 +1440,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1473,31 +1473,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1506,7 +1506,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1515,31 +1515,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1548,31 +1548,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1581,7 +1581,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1590,39 +1590,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1631,39 +1631,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1672,35 +1672,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1709,39 +1709,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1750,39 +1750,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll index 644fcbbfefdf950..dc6feadbbe17ad8 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll @@ -1359,7 +1359,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1368,31 +1368,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1401,31 +1401,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1434,7 +1434,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1443,31 +1443,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1476,31 +1476,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1509,7 +1509,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1518,39 +1518,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1559,39 +1559,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1600,35 +1600,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1637,39 +1637,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1678,39 +1678,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll index 34ea1a644f6cca3..348ffebf220e0af 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll @@ -1477,7 +1477,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1486,27 +1486,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1515,27 +1515,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1544,7 +1544,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1553,27 +1553,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1582,27 +1582,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1611,7 +1611,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1620,43 +1620,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1665,43 +1665,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1710,23 +1710,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1735,43 +1735,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1780,43 +1780,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 93b844eddf18246..7a7187dc9d64291 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -1477,7 +1477,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1486,27 +1486,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1515,27 +1515,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1544,7 +1544,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1553,27 +1553,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1582,27 +1582,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1611,7 +1611,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1620,43 +1620,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1665,43 +1665,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1710,23 +1710,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1735,43 +1735,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1780,43 +1780,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll index f879a09d067e658..b9accec482622d5 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll @@ -1473,27 +1473,27 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v2i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v2i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1502,27 +1502,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1531,27 +1531,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1560,27 +1560,27 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1589,27 +1589,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1618,27 +1618,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1647,27 +1647,27 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1676,35 +1676,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1713,35 +1713,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1750,35 +1750,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1787,35 +1787,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1824,35 +1824,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll index fe472342e21445e..87c0aee4a564de6 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll @@ -1375,7 +1375,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1384,31 +1384,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1417,31 +1417,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1450,7 +1450,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1459,31 +1459,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1492,31 +1492,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1525,7 +1525,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1534,39 +1534,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1575,39 +1575,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1616,35 +1616,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1653,39 +1653,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1694,39 +1694,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll index 1045b827da7cad6..2cf49533438f22f 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll @@ -1333,7 +1333,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1342,31 +1342,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1375,31 +1375,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1408,7 +1408,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1417,31 +1417,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1450,31 +1450,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1483,7 +1483,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1492,39 +1492,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1533,39 +1533,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1574,35 +1574,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1611,39 +1611,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1652,39 +1652,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll index f093d1c8ca358a4..0d4cd2648d80588 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll @@ -1557,7 +1557,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1566,27 +1566,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1595,27 +1595,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1624,7 +1624,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1633,27 +1633,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1662,27 +1662,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1691,7 +1691,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1700,43 +1700,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1745,43 +1745,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1790,23 +1790,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1815,43 +1815,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1860,43 +1860,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 09521cfa2d23d36..c2cbc32e328ecbb 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -1557,7 +1557,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1566,27 +1566,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1595,27 +1595,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1624,7 +1624,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1633,27 +1633,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1662,27 +1662,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1691,7 +1691,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1700,43 +1700,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1745,43 +1745,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1790,23 +1790,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1815,43 +1815,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1860,43 +1860,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll index 3ae71daf50a3063..18fd15031d28aa8 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll @@ -1433,27 +1433,27 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v2i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v2i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1462,27 +1462,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1491,27 +1491,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1520,27 +1520,27 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1549,27 +1549,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1578,27 +1578,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1607,27 +1607,27 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1636,35 +1636,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1673,35 +1673,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1710,35 +1710,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1747,35 +1747,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1784,35 +1784,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll index 4256a73a7cf7354..ad2d92681ffa9c1 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll @@ -1459,7 +1459,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1468,27 +1468,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1497,27 +1497,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1526,7 +1526,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1535,27 +1535,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1564,27 +1564,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1593,7 +1593,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1602,35 +1602,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1639,35 +1639,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1676,31 +1676,31 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1709,35 +1709,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1746,35 +1746,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll b/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll index 050e6f6735969fc..863b08609fc60d1 100644 --- a/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll +++ b/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll @@ -3,7 +3,7 @@ ; CHECK-DAG: DemandedBits: 0xff00 for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xff00 for %y = or <2 x i32> %b, zeroinitializer ; CHECK-DAG: DemandedBits: 0xff00 for %z = or <2 x i32> %x, %y -; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, splat (i32 8) ; CHECK-DAG: DemandedBits: 0xff for %r = trunc <2 x i32> %u to <2 x i8> define <2 x i8> @test_basic(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer @@ -61,8 +61,8 @@ define <3 x i32> @test_shufflevector(<2 x i32> %a, <2 x i32> %b) { ; Shifts with splat shift amounts ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_shl(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = shl <2 x i32> %x, @@ -71,8 +71,8 @@ define <2 x i32> @test_shl(<2 x i32> %a) { } ; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_ashr(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = ashr <2 x i32> %x, @@ -81,8 +81,8 @@ define <2 x i32> @test_ashr(<2 x i32> %a) { } ; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_lshr(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = lshr <2 x i32> %x, @@ -95,8 +95,8 @@ declare <2 x i32> @llvm.fshr.i32(<2 x i32>, <2 x i32>, <2 x i32>) ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer -; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> splat (i32 4)) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, splat (i32 255) define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer %y = or <2 x i32> %b, zeroinitializer @@ -107,8 +107,8 @@ define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer -; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> splat (i32 28)) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, splat (i32 255) define <2 x i32> @test_fshr(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer %y = or <2 x i32> %b, zeroinitializer diff --git a/llvm/test/Analysis/DemandedBits/vectors.ll b/llvm/test/Analysis/DemandedBits/vectors.ll index 79c0d2792a1a0cb..c7bcd4e7b078aff 100644 --- a/llvm/test/Analysis/DemandedBits/vectors.ll +++ b/llvm/test/Analysis/DemandedBits/vectors.ll @@ -4,7 +4,7 @@ ; CHECK-DAG: DemandedBits: 0xff00 for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xff00 for %y = or <2 x i32> %b, zeroinitializer ; CHECK-DAG: DemandedBits: 0xff00 for %z = or <2 x i32> %x, %y -; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, splat (i32 8) ; CHECK-DAG: DemandedBits: 0xff for %r = trunc <2 x i32> %u to <2 x i8> define <2 x i8> @test_basic(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer @@ -66,8 +66,8 @@ define <3 x i32> @test_shufflevector(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_shl': ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_shl(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = shl <2 x i32> %x, @@ -77,8 +77,8 @@ define <2 x i32> @test_shl(<2 x i32> %a) { ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_ashr': ; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_ashr(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = ashr <2 x i32> %x, @@ -88,8 +88,8 @@ define <2 x i32> @test_ashr(<2 x i32> %a) { ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_lshr': ; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_lshr(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = lshr <2 x i32> %x, @@ -103,8 +103,8 @@ declare <2 x i32> @llvm.fshr.i32(<2 x i32>, <2 x i32>, <2 x i32>) ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_fshl': ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer -; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> splat (i32 4)) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, splat (i32 255) define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer %y = or <2 x i32> %b, zeroinitializer @@ -116,8 +116,8 @@ define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_fshr': ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer -; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> splat (i32 28)) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, splat (i32 255) define <2 x i32> @test_fshr(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer %y = or <2 x i32> %b, zeroinitializer diff --git a/llvm/test/Analysis/ValueTracking/known-bits.ll b/llvm/test/Analysis/ValueTracking/known-bits.ll index 9d0b153d8ccfc3e..5b71402a96f0df3 100644 --- a/llvm/test/Analysis/ValueTracking/known-bits.ll +++ b/llvm/test/Analysis/ValueTracking/known-bits.ll @@ -3,7 +3,7 @@ define <4 x i1> @vec_reverse_known_bits(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reverse_known_bits( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %x = or <4 x i8> %xx, %rev = call <4 x i8> @llvm.vector.reverse(<4 x i8> %x) diff --git a/llvm/test/Analysis/ValueTracking/known-fpclass.ll b/llvm/test/Analysis/ValueTracking/known-fpclass.ll index 2b8e6298d746ae2..02289fd2641cf1e 100644 --- a/llvm/test/Analysis/ValueTracking/known-fpclass.ll +++ b/llvm/test/Analysis/ValueTracking/known-fpclass.ll @@ -3,7 +3,7 @@ define <4 x i1> @vector_reverse_fpclass(<4 x double> nofpclass(nzero nan) %x) { ; CHECK-LABEL: @vector_reverse_fpclass( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) %op = call <4 x double> @llvm.vector.reverse(<4 x double> %x.abs) diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll index db2c4f3a1ed650b..f02ff583e5cbb5a 100644 --- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll @@ -210,7 +210,7 @@ define i1 @shl_nz_bounded_cnt(i32 %cnt, i32 %y) { define <2 x i1> @shl_nz_bounded_cnt_vec_todo_no_common_bit(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @shl_nz_bounded_cnt_vec_todo_no_common_bit( ; CHECK-NEXT: [[CNT:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 16) ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[VAL]], [[CNT]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[SHL]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -315,7 +315,7 @@ define i1 @lshr_nz_bounded_cnt_fail(i32 %cnt, i32 %y) { define <2 x i1> @ashr_nz_bounded_cnt_vec_fail(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @ashr_nz_bounded_cnt_vec_fail( -; CHECK-NEXT: [[CNT:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 24) ; CHECK-NEXT: [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], ; CHECK-NEXT: [[SHL:%.*]] = ashr <2 x i32> [[VAL]], [[CNT]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[SHL]], zeroinitializer @@ -648,7 +648,7 @@ define <2 x i1> @bitcast_veci8_to_veci16(<4 x i8> %xx, <2 x i16> %ind) { define <3 x i1> @bitcast_veci3_to_veci4_fail_not_multiple(<4 x i3> %xx, <3 x i4> %ind) { ; CHECK-LABEL: @bitcast_veci3_to_veci4_fail_not_multiple( -; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <4 x i3> [[XX:%.*]], +; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <4 x i3> [[XX:%.*]], splat (i3 1) ; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i3> [[XA]] to <3 x i4> ; CHECK-NEXT: [[Z:%.*]] = or <3 x i4> [[X]], [[IND:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i4> [[Z]], zeroinitializer @@ -663,7 +663,7 @@ define <3 x i1> @bitcast_veci3_to_veci4_fail_not_multiple(<4 x i3> %xx, <3 x i4> define <4 x i1> @bitcast_fail_veci16_to_veci8(<2 x i16> %xx, <4 x i8> %ind) { ; CHECK-LABEL: @bitcast_fail_veci16_to_veci8( -; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <2 x i16> [[XX:%.*]], +; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <2 x i16> [[XX:%.*]], splat (i16 1) ; CHECK-NEXT: [[X:%.*]] = bitcast <2 x i16> [[XA]] to <4 x i8> ; CHECK-NEXT: [[Z:%.*]] = or <4 x i8> [[X]], [[IND:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[Z]], zeroinitializer @@ -1294,7 +1294,7 @@ false: define <2 x i1> @range_metadata_vec(ptr %p, <2 x i32> %x) { ; CHECK-LABEL: @range_metadata_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %v = load <2 x i32>, ptr %p, !range !{i32 1, i32 100} %or = or <2 x i32> %v, %x @@ -1377,7 +1377,7 @@ define i1 @neg_range_call(i8 %y) { define <2 x i1> @range_attr_vec(<2 x i8> range(i8 1, 0) %x, <2 x i8> %y) { ; CHECK-LABEL: @range_attr_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %or = or <2 x i8> %y, %x %cmp = icmp ne <2 x i8> %or, zeroinitializer @@ -1401,7 +1401,7 @@ declare range(i8 -1, 1) <2 x i8> @returns_contain_zero_range_helper_vec() define <2 x i1> @range_return_vec(<2 x i8> %y) { ; CHECK-LABEL: @range_return_vec( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @returns_non_zero_range_helper_vec() -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %x = call <2 x i8> @returns_non_zero_range_helper_vec() %or = or <2 x i8> %y, %x @@ -1427,7 +1427,7 @@ declare <2 x i8> @returns_i8_helper_vec() define <2 x i1> @range_call_vec(<2 x i8> %y) { ; CHECK-LABEL: @range_call_vec( ; CHECK-NEXT: [[X:%.*]] = call range(i8 1, 0) <2 x i8> @returns_i8_helper_vec() -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %x = call range(i8 1, 0) <2 x i8> @returns_i8_helper_vec() %or = or <2 x i8> %y, %x diff --git a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll index fba907ab731b0ba..f272d9009aed130 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll @@ -91,10 +91,10 @@ define <2 x i1> @add_XY_or_bit0_is_one(<2 x i8> %x, <2 x i8> %C) nounwind { define <2 x i1> @sub_XY_and_bit0_is_zero_fail(<2 x i8> %x, <2 x i8> %C) nounwind { ; CHECK-LABEL: @sub_XY_and_bit0_is_zero_fail( -; CHECK-NEXT: [[C1:%.*]] = or <2 x i8> [[C:%.*]], +; CHECK-NEXT: [[C1:%.*]] = or <2 x i8> [[C:%.*]], splat (i8 8) ; CHECK-NEXT: [[Y:%.*]] = sub <2 x i8> [[X:%.*]], [[C1]] ; CHECK-NEXT: [[W:%.*]] = and <2 x i8> [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[W]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[W]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %C1 = or <2 x i8> %C, @@ -149,7 +149,7 @@ define <2 x i1> @sub_YX_xor_bit0_is_one_fail(<2 x i8> %x, <2 x i8> %C) nounwind ; CHECK-LABEL: @sub_YX_xor_bit0_is_one_fail( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> [[X:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i8> [[X]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP2]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP2]], splat (i8 -13) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %C1 = sub <2 x i8> %C, @@ -191,10 +191,10 @@ define i1 @add_YX_xor_bit0_is_one_fail(i8 %x, i8 %C) nounwind { define <2 x i1> @add_XY_or_bit0_is_one_fail(<2 x i8> %x, <2 x i8> %C) nounwind { ; CHECK-LABEL: @add_XY_or_bit0_is_one_fail( -; CHECK-NEXT: [[C1:%.*]] = add <2 x i8> [[C:%.*]], +; CHECK-NEXT: [[C1:%.*]] = add <2 x i8> [[C:%.*]], splat (i8 1) ; CHECK-NEXT: [[Y:%.*]] = add <2 x i8> [[C1]], [[X:%.*]] ; CHECK-NEXT: [[W:%.*]] = or <2 x i8> [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[W]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[W]], splat (i8 90) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %C1 = add <2 x i8> %C, @@ -219,7 +219,7 @@ define <2 x i32> @add_and_eval_vec(<2 x i32> %x, <2 x i32> %C) { define <2 x i32> @add_xor_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @add_xor_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %y = add <2 x i32> %x, %z = xor <2 x i32> %y, %x @@ -229,7 +229,7 @@ define <2 x i32> @add_xor_eval_vec(<2 x i32> %x) { define <2 x i32> @add_or_eval_vec(<2 x i32> %x, <2 x i32> %C) { ; CHECK-LABEL: @add_or_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %y = add <2 x i32> %x, %z = or <2 x i32> %y, %x diff --git a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll index 407100dec1201e3..663de281f19ba40 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll @@ -16,7 +16,7 @@ define i1 @blsmsk_eq_is_false(i32 %x) { define <2 x i1> @blsmsk_ne_is_true_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_ne_is_true_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> %x1, @@ -27,7 +27,7 @@ define <2 x i1> @blsmsk_ne_is_true_vec(<2 x i32> %x) { define <2 x i1> @blsmsk_ne_is_true_diff_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_ne_is_true_diff_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> %x1, @@ -82,7 +82,7 @@ define i32 @blsmsk_add_eval(i32 %x) { define <2 x i32> @blsmsk_add_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_add_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 33) ; %x1 = or <2 x i32> %x, %x2 = add <2 x i32> %x1, @@ -115,7 +115,7 @@ define i32 @blsmsk_or_eval(i32 %x) { define <2 x i32> @blsmsk_or_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_or_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 33) ; %x1 = or <2 x i32> %x, %x2 = add <2 x i32> %x1, @@ -174,8 +174,8 @@ define i32 @blsmsk_and_eval2(i32 %x) { define <2 x i32> @blsmsk_and_eval3_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_and_eval3_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X1]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 34) +; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X1]], splat (i32 63) ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] ; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X3]], ; CHECK-NEXT: ret <2 x i32> [[Z]] @@ -242,9 +242,9 @@ define <2 x i32> @blsmsk_add_eval_assume_vec(<2 x i32> %x) { ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP0]]) ; CHECK-NEXT: [[CMP1:%.*]] = extractelement <2 x i1> [[CMP]], i64 1 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]]) -; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X]], +; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %lb = and <2 x i32> %x, @@ -335,10 +335,10 @@ define i1 @blsi_ne_is_true(i32 %x) { define <2 x i1> @blsi_ge_is_false_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_ge_is_false_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 10) ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], splat (i32 7) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x1 = or <2 x i32> %x, @@ -353,7 +353,7 @@ define <2 x i1> @blsi_ge_is_false_diff_vec(<2 x i32> %x) { ; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], splat (i32 7) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x1 = or <2 x i32> %x, @@ -399,7 +399,7 @@ define i32 @blsi_sub_eval(i32 %x) { define <2 x i32> @blsi_sub_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_sub_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 -31) ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> , %x1 @@ -421,7 +421,7 @@ define i32 @blsi_or_eval(i32 %x) { define <2 x i32> @blsi_xor_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_xor_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 33) ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> , %x1 @@ -443,10 +443,10 @@ define i32 @blsi_and_eval(i32 %x) { define <2 x i32> @blsi_and_eval2_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_and_eval2_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, @@ -473,7 +473,7 @@ define i32 @blsi_and_eval3(i32 %x) { define <2 x i1> @blsi_eq_is_false_assume_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_eq_is_false_assume_vec( -; CHECK-NEXT: [[LB:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[LB:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[LB]], zeroinitializer ; CHECK-NEXT: [[CMP0:%.*]] = extractelement <2 x i1> [[CMP]], i64 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP0]]) @@ -481,7 +481,7 @@ define <2 x i1> @blsi_eq_is_false_assume_vec(<2 x i32> %x) { ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]]) ; CHECK-NEXT: [[X2:%.*]] = sub <2 x i32> zeroinitializer, [[X]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i32> [[X3]], splat (i32 8) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %lb = and <2 x i32> %x, @@ -533,7 +533,7 @@ define <2 x i1> @blsi_cmp_eq_diff_bits_vec(<2 x i32> %x) { ; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[X2:%.*]] = sub <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x1 = or <2 x i32> %x, @@ -577,10 +577,10 @@ define i32 @blsi_and_eval_assume(i32 %x) { define <2 x i1> @blsmsk_ne_no_proof_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_ne_no_proof_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 10) ; CHECK-NEXT: [[X2:%.*]] = add nsw <2 x i32> [[X1]], ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = icmp ne <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp ne <2 x i32> [[X3]], splat (i32 8) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x1 = or <2 x i32> %x, @@ -592,10 +592,10 @@ define <2 x i1> @blsmsk_ne_no_proof_vec(<2 x i32> %x) { define <2 x i32> @blsmsk_add_noeval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_add_noeval_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 9) ; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X1]], ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, @@ -649,10 +649,10 @@ define i32 @blsmsk_add_no_eval2(i32 %x) { define <2 x i32> @blsmsk_xor_no_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_xor_no_eval_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 34) ; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X1]], ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = xor <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = xor <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, @@ -733,10 +733,10 @@ define i32 @blsi_or_no_eval(i32 %x) { define <2 x i32> @blsi_or_no_partial_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_or_no_partial_eval_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> , [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, diff --git a/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll b/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll index e2fe873d715cd6d..0d452829ae75782 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll @@ -125,7 +125,7 @@ define <8 x i1> @hsub_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: define <8 x i1> @hsub_and_eq_v8i16_sat( ; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %or1 = or <8 x i16> %x, @@ -171,7 +171,7 @@ define <16 x i1> @hsub_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) { ; CHECK-LABEL: define <16 x i1> @hsub_and_eq_v16i16_sat( ; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %or1 = or <16 x i16> %x, @@ -187,7 +187,7 @@ define <4 x i1> @hadd_shuffle_2st_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: define <4 x i1> @hadd_shuffle_2st_v4i32( ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %and1 = and <4 x i32> %x, @@ -202,7 +202,7 @@ define <4 x i1> @hadd_shuffle_4th_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_v4i32( ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %and1 = and <4 x i32> %x, @@ -218,10 +218,10 @@ define <4 x i1> @hadd_shuffle_2st_negative_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], splat (i32 3) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], +; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[RET]] ; entry: @@ -237,11 +237,11 @@ define <4 x i1> @hadd_shuffle_4th_negative_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_negative_v4i32( ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], +; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], splat (i32 3) ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], +; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[RET]] ; entry: diff --git a/llvm/test/Analysis/ValueTracking/knownzero-shift.ll b/llvm/test/Analysis/ValueTracking/knownzero-shift.ll index d4ed849c231f8fb..3ce590c3a51ba0e 100644 --- a/llvm/test/Analysis/ValueTracking/knownzero-shift.ll +++ b/llvm/test/Analysis/ValueTracking/knownzero-shift.ll @@ -32,8 +32,8 @@ define i32 @shl_shl(i32 %A) { define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) { ; CHECK-LABEL: @shl_shl_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], splat (i33 5) +; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], splat (i33 28) ; CHECK-NEXT: ret <2 x i33> [[C]] ; %B = shl <2 x i33> %A, @@ -67,8 +67,8 @@ define i232 @lshr_lshr(i232 %A) { define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) { ; CHECK-LABEL: @lshr_lshr_splat_vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 28) +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 4) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = lshr <2 x i32> %A, diff --git a/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll b/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll index e86d28ebbc1d26f..5224d75a157d5b0 100644 --- a/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll +++ b/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll @@ -124,16 +124,16 @@ define void @numsignbits_shl_zext_all_bits_shifted_out(i8 %x) { define void @numsignbits_shl_zext_vector(<2 x i8> %x) { ; CHECK-LABEL: define void @numsignbits_shl_zext_vector( ; CHECK-SAME: <2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i8> [[X]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i8> [[X]], splat (i8 5) ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[ASHR]] to <2 x i16> -; CHECK-NEXT: [[NSB4:%.*]] = shl <2 x i16> [[ZEXT]], -; CHECK-NEXT: [[ADD14:%.*]] = and <2 x i16> [[NSB4]], +; CHECK-NEXT: [[NSB4:%.*]] = shl <2 x i16> [[ZEXT]], splat (i16 10) +; CHECK-NEXT: [[ADD14:%.*]] = and <2 x i16> [[NSB4]], splat (i16 15360) ; CHECK-NEXT: call void @escape2(<2 x i16> [[ADD14]]) -; CHECK-NEXT: [[ADD13:%.*]] = and <2 x i16> [[NSB4]], +; CHECK-NEXT: [[ADD13:%.*]] = and <2 x i16> [[NSB4]], splat (i16 7168) ; CHECK-NEXT: call void @escape2(<2 x i16> [[ADD13]]) -; CHECK-NEXT: [[ADD12:%.*]] = and <2 x i16> [[NSB4]], +; CHECK-NEXT: [[ADD12:%.*]] = and <2 x i16> [[NSB4]], splat (i16 3072) ; CHECK-NEXT: call void @escape2(<2 x i16> [[ADD12]]) -; CHECK-NEXT: [[AND11:%.*]] = and <2 x i16> [[NSB4]], +; CHECK-NEXT: [[AND11:%.*]] = and <2 x i16> [[NSB4]], splat (i16 2048) ; CHECK-NEXT: [[ADD11:%.*]] = add nsw <2 x i16> [[AND11]], [[NSB4]] ; CHECK-NEXT: call void @escape2(<2 x i16> [[ADD11]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Assembler/ConstantExprFold.ll b/llvm/test/Assembler/ConstantExprFold.ll index 44a7511ea568b02..944e6f0236ecbfe 100644 --- a/llvm/test/Assembler/ConstantExprFold.ll +++ b/llvm/test/Assembler/ConstantExprFold.ll @@ -41,8 +41,8 @@ ; CHECK: @gep2 = global <2 x ptr> undef ; CHECK: @gep3 = global <2 x ptr> zeroinitializer ; CHECK: @gep4 = global <2 x ptr> zeroinitializer -; CHECK: @bitcast1 = global <2 x i32> -; CHECK: @bitcast2 = global <4 x i16> +; CHECK: @bitcast1 = global <2 x i32> splat (i32 -1) +; CHECK: @bitcast2 = global <4 x i16> splat (i16 -1) ;. define void @dummy() { ; CHECK-LABEL: @dummy( diff --git a/llvm/test/Assembler/constant-splat.ll b/llvm/test/Assembler/constant-splat.ll index f3ec0c8340aa157..1c2831058b88709 100644 --- a/llvm/test/Assembler/constant-splat.ll +++ b/llvm/test/Assembler/constant-splat.ll @@ -6,47 +6,47 @@ @my_global = external global i32 -; CHECK: @constant.splat.i1 = constant <1 x i1> +; CHECK: @constant.splat.i1 = constant <1 x i1> splat (i1 true) @constant.splat.i1 = constant <1 x i1> splat (i1 true) -; CHECK: @constant.splat.i32 = constant <5 x i32> +; CHECK: @constant.splat.i32 = constant <5 x i32> splat (i32 7) @constant.splat.i32 = constant <5 x i32> splat (i32 7) -; CHECK: @constant.splat.i128 = constant <2 x i128> +; CHECK: @constant.splat.i128 = constant <2 x i128> splat (i128 85070591730234615870450834276742070272) @constant.splat.i128 = constant <2 x i128> splat (i128 85070591730234615870450834276742070272) -; CHECK: @constant.splat.f16 = constant <4 x half> +; CHECK: @constant.splat.f16 = constant <4 x half> splat (half 0xHBC00) @constant.splat.f16 = constant <4 x half> splat (half 0xHBC00) -; CHECK: @constant.splat.f32 = constant <5 x float> +; CHECK: @constant.splat.f32 = constant <5 x float> splat (float -2.000000e+00) @constant.splat.f32 = constant <5 x float> splat (float -2.000000e+00) -; CHECK: @constant.splat.f64 = constant <3 x double> +; CHECK: @constant.splat.f64 = constant <3 x double> splat (double -3.000000e+00) @constant.splat.f64 = constant <3 x double> splat (double -3.000000e+00) -; CHECK: @constant.splat.128 = constant <2 x fp128> +; CHECK: @constant.splat.128 = constant <2 x fp128> splat (fp128 0xL00000000000000018000000000000000) @constant.splat.128 = constant <2 x fp128> splat (fp128 0xL00000000000000018000000000000000) -; CHECK: @constant.splat.bf16 = constant <4 x bfloat> +; CHECK: @constant.splat.bf16 = constant <4 x bfloat> splat (bfloat 0xRC0A0) @constant.splat.bf16 = constant <4 x bfloat> splat (bfloat 0xRC0A0) -; CHECK: @constant.splat.x86_fp80 = constant <3 x x86_fp80> +; CHECK: @constant.splat.x86_fp80 = constant <3 x x86_fp80> splat (x86_fp80 0xK4000C8F5C28F5C28F800) @constant.splat.x86_fp80 = constant <3 x x86_fp80> splat (x86_fp80 0xK4000C8F5C28F5C28F800) -; CHECK: @constant.splat.ppc_fp128 = constant <1 x ppc_fp128> +; CHECK: @constant.splat.ppc_fp128 = constant <1 x ppc_fp128> splat (ppc_fp128 0xM80000000000000000000000000000000) @constant.splat.ppc_fp128 = constant <1 x ppc_fp128> splat (ppc_fp128 0xM80000000000000000000000000000000) ; CHECK: @constant.splat.global.ptr = constant <4 x ptr> @constant.splat.global.ptr = constant <4 x ptr> splat (ptr @my_global) define void @add_fixed_lenth_vector_splat_i32(<4 x i32> %a) { -; CHECK: %add = add <4 x i32> %a, +; CHECK: %add = add <4 x i32> %a, splat (i32 137) %add = add <4 x i32> %a, splat (i32 137) ret void } define <4 x i32> @ret_fixed_lenth_vector_splat_i32() { -; CHECK: ret <4 x i32> +; CHECK: ret <4 x i32> splat (i32 56) ret <4 x i32> splat (i32 56) } diff --git a/llvm/test/Assembler/opaque-ptr.ll b/llvm/test/Assembler/opaque-ptr.ll index 236a64904bc9575..287b81d2d1c2f13 100644 --- a/llvm/test/Assembler/opaque-ptr.ll +++ b/llvm/test/Assembler/opaque-ptr.ll @@ -92,7 +92,7 @@ define <2 x ptr> @gep_constexpr_vec1(ptr %a) { } ; CHECK: define <2 x ptr> @gep_constexpr_vec2(<2 x ptr> %a) -; CHECK: ret <2 x ptr> getelementptr (i16, <2 x ptr> zeroinitializer, <2 x i32> ) +; CHECK: ret <2 x ptr> getelementptr (i16, <2 x ptr> zeroinitializer, <2 x i32> splat (i32 3)) define <2 x ptr> @gep_constexpr_vec2(<2 x ptr> %a) { ret <2 x ptr> getelementptr (i16, <2 x ptr> zeroinitializer, i32 3) } diff --git a/llvm/test/Bitcode/constantsTest.3.2.ll b/llvm/test/Bitcode/constantsTest.3.2.ll index f20b1b20c1544be..0b04686bab1d2ee 100644 --- a/llvm/test/Bitcode/constantsTest.3.2.ll +++ b/llvm/test/Bitcode/constantsTest.3.2.ll @@ -47,7 +47,7 @@ entry: %res2 = extractvalue [2 x i32] [i32 1, i32 2], 0 ;const vector -; CHECK-NEXT: %res3 = add <2 x i32> , +; CHECK-NEXT: %res3 = add <2 x i32> splat (i32 1) %res3 = add <2 x i32> , ;zeroinitializer @@ -109,11 +109,11 @@ entry: icmp eq i32 1, 0 ; CHECK-NEXT: fcmp oeq float 1.000000e+00, 0.000000e+00 fcmp oeq float 1.0, 0.0 - ; CHECK-NEXT: extractelement <2 x i32> , i32 1 + ; CHECK-NEXT: extractelement <2 x i32> splat (i32 1) extractelement <2 x i32> , i32 1 - ; CHECK-NEXT: insertelement <2 x i32> , i32 0, i32 1 + ; CHECK-NEXT: insertelement <2 x i32> splat (i32 1), i32 0, i32 1 insertelement <2 x i32> , i32 0, i32 1 - ; CHECK-NEXT: shufflevector <2 x i32> , <2 x i32> zeroinitializer, <4 x i32> + ; CHECK-NEXT: shufflevector <2 x i32> splat (i32 1), <2 x i32> zeroinitializer, <4 x i32> shufflevector <2 x i32> , <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: extractvalue { i32, float } { i32 1, float 2.000000e+00 }, 0 extractvalue { i32, float } { i32 1, float 2.0 }, 0 @@ -121,4 +121,4 @@ entry: insertvalue { i32, float } { i32 1, float 2.0 }, i32 0, 0 ret void -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll b/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll index 840ed6a3c40d688..7b4a94a41f7d842 100644 --- a/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll +++ b/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll @@ -517,7 +517,7 @@ declare void @dummy(i64, i64, i64) define void @vectorPromotion() { ; OPTALL-LABEL: define void @vectorPromotion() { ; OPTALL-NEXT: [[ENTRY:.*:]] -; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, +; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, splat (i32 8) ; OPTALL-NEXT: [[B:%.*]] = zext <2 x i32> [[A]] to <2 x i64> ; OPTALL-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll index b69afa3ab1f3d65..7aaf599583c8017 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll @@ -1357,17 +1357,17 @@ define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x fl define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) { ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> , [[X]], !fpmath [[META1:![0-9]+]] +; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META1:![0-9]+]] ; CHECK-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> , [[X]] +; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> , [[X]] +; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> , [[X]], !fpmath [[META0]] +; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> , [[X]], !fpmath [[META0]] +; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; @@ -2114,7 +2114,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { ; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] +; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 @@ -2185,7 +2185,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { ; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] +; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 @@ -2256,7 +2256,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { ; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] +; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] ; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2:![0-9]+]] ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll index b8585120afa45f1..e13f0fba2bdb7ee 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll @@ -2144,7 +2144,7 @@ define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) { ; VI-LABEL: @bitreverse_3xi15( ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> ; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]]) -; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], +; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], splat (i32 17) ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8 ; VI-NEXT: ret void @@ -2842,7 +2842,7 @@ define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) { ; VI-LABEL: @bitreverse_3xi16( ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> ; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]]) -; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], +; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], splat (i32 16) ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8 ; VI-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index e4756ad3817c2e5..d88719502d88fd7 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -5594,7 +5594,7 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out, < define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @udiv_v2i32_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> splat (i32 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float @@ -5938,7 +5938,7 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3 define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @urem_v2i32_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> splat (i32 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float @@ -6393,7 +6393,7 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out, define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @sdiv_v2i32_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> splat (i32 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], 31 @@ -6852,7 +6852,7 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3 define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @srem_v2i32_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> splat (i32 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], 31 @@ -7334,7 +7334,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, < define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @udiv_v2i64_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> splat (i64 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], [[TMP2]] @@ -7592,7 +7592,7 @@ define amdgpu_kernel void @urem_v2i64_pow2k_denom(ptr addrspace(1) %out, <2 x i6 define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @urem_v2i64_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> splat (i64 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = urem i64 [[TMP1]], [[TMP2]] @@ -8261,7 +8261,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @sdiv_v2i64_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> splat (i64 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = sdiv i64 [[TMP1]], [[TMP2]] @@ -9341,7 +9341,7 @@ define amdgpu_kernel void @srem_v2i64_pow2k_denom(ptr addrspace(1) %out, <2 x i6 define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @srem_v2i64_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> splat (i64 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = srem i64 [[TMP1]], [[TMP2]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll index d938c16bf6134c7..296b817bc8f7510 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll @@ -69,13 +69,13 @@ define <2 x i8> @mul_v1i16(<1 x i16> %arg) { ; ; VI-LABEL: @mul_v1i16( ; VI-NEXT: BB: -; VI-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], +; VI-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], splat (i16 42) ; VI-NEXT: [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8> ; VI-NEXT: ret <2 x i8> [[CAST]] ; ; DISABLED-LABEL: @mul_v1i16( ; DISABLED-NEXT: BB: -; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], +; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], splat (i16 42) ; DISABLED-NEXT: [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8> ; DISABLED-NEXT: ret <2 x i8> [[CAST]] ; @@ -97,12 +97,12 @@ define <1 x i8> @mul_v1i8(<1 x i8> %arg) { ; ; VI-LABEL: @mul_v1i8( ; VI-NEXT: BB: -; VI-NEXT: [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], +; VI-NEXT: [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], splat (i8 42) ; VI-NEXT: ret <1 x i8> [[MUL]] ; ; DISABLED-LABEL: @mul_v1i8( ; DISABLED-NEXT: BB: -; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], +; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], splat (i8 42) ; DISABLED-NEXT: ret <1 x i8> [[MUL]] ; BB: @@ -112,10 +112,10 @@ BB: define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-LABEL: @smul24_v2i32( -; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], -; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], -; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], +; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8) +; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8) +; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8) +; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 @@ -127,10 +127,10 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-NEXT: ret <2 x i32> [[MUL]] ; ; VI-LABEL: @smul24_v2i32( -; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], -; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], -; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], +; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8) +; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8) +; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8) +; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 @@ -142,10 +142,10 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; VI-NEXT: ret <2 x i32> [[MUL]] ; ; DISABLED-LABEL: @smul24_v2i32( -; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], -; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], -; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], +; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8) +; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8) +; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8) +; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i32> [[MUL]] ; @@ -184,8 +184,8 @@ define i32 @umul24_i32(i32 %lhs, i32 %rhs) { define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-LABEL: @umul24_v2i32( -; SI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], +; SI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215) +; SI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 @@ -197,8 +197,8 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-NEXT: ret <2 x i32> [[MUL]] ; ; VI-LABEL: @umul24_v2i32( -; VI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], +; VI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215) +; VI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 @@ -210,8 +210,8 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; VI-NEXT: ret <2 x i32> [[MUL]] ; ; DISABLED-LABEL: @umul24_v2i32( -; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], +; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215) +; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i32> [[MUL]] ; @@ -495,8 +495,8 @@ define i31 @umul24_i31(i31 %lhs, i31 %rhs) { define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; SI-LABEL: @umul24_v2i31( -; SI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], +; SI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], splat (i31 16777215) +; SI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], splat (i31 16777215) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 @@ -514,8 +514,8 @@ define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; SI-NEXT: ret <2 x i31> [[MUL]] ; ; VI-LABEL: @umul24_v2i31( -; VI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], +; VI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], splat (i31 16777215) +; VI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], splat (i31 16777215) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 @@ -533,8 +533,8 @@ define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; VI-NEXT: ret <2 x i31> [[MUL]] ; ; DISABLED-LABEL: @umul24_v2i31( -; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], +; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], splat (i31 16777215) +; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], splat (i31 16777215) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i31> [[MUL]] ; @@ -546,10 +546,10 @@ define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; SI-LABEL: @smul24_v2i31( -; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], -; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], -; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], +; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], splat (i31 8) +; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], splat (i31 8) +; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], splat (i31 8) +; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], splat (i31 8) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 @@ -567,10 +567,10 @@ define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; SI-NEXT: ret <2 x i31> [[MUL]] ; ; VI-LABEL: @smul24_v2i31( -; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], -; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], -; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], +; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], splat (i31 8) +; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], splat (i31 8) +; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], splat (i31 8) +; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], splat (i31 8) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 @@ -588,10 +588,10 @@ define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; VI-NEXT: ret <2 x i31> [[MUL]] ; ; DISABLED-LABEL: @smul24_v2i31( -; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], -; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], -; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], +; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], splat (i31 8) +; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], splat (i31 8) +; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], splat (i31 8) +; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], splat (i31 8) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i31> [[MUL]] ; @@ -733,10 +733,10 @@ define i32 @umul25_i32(i32 %lhs, i32 %rhs) { define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) { ; SI-LABEL: @smul24_v2i33( -; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], -; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], -; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], +; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], splat (i33 9) +; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], splat (i33 9) +; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], splat (i33 9) +; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], splat (i33 9) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 @@ -754,10 +754,10 @@ define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) { ; SI-NEXT: ret <2 x i33> [[MUL]] ; ; VI-LABEL: @smul24_v2i33( -; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], -; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], -; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], +; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], splat (i33 9) +; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], splat (i33 9) +; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], splat (i33 9) +; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], splat (i33 9) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 @@ -775,10 +775,10 @@ define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) { ; VI-NEXT: ret <2 x i33> [[MUL]] ; ; DISABLED-LABEL: @smul24_v2i33( -; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], -; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], -; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], +; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], splat (i33 9) +; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], splat (i33 9) +; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], splat (i33 9) +; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], splat (i33 9) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i33> [[MUL]] ; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll index a5bea7cd738f1d7..6840c75644b9775 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll @@ -100,7 +100,7 @@ define void @broken_phi() { ; GFX9-NEXT: bb: ; GFX9-NEXT: br label [[BB1:%.*]] ; GFX9: bb1: -; GFX9-NEXT: [[I:%.*]] = phi <4 x i8> [ , [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] +; GFX9-NEXT: [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] ; GFX9-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]] ; GFX9: bb2: ; GFX9-NEXT: br label [[BB3]] @@ -118,7 +118,7 @@ define void @broken_phi() { ; GFX12-NEXT: bb: ; GFX12-NEXT: br label [[BB1:%.*]] ; GFX12: bb1: -; GFX12-NEXT: [[I:%.*]] = phi <4 x i8> [ , [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] +; GFX12-NEXT: [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] ; GFX12-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]] ; GFX12: bb2: ; GFX12-NEXT: br label [[BB3]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll index acdab29e85b91af..b494ff8ba1f5dd1 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll @@ -541,7 +541,7 @@ define float @test_pow_afn_f32_neg0.0(float %x) { define <2 x float> @test_pow_afn_v2f32_0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %pow @@ -550,7 +550,7 @@ define <2 x float> @test_pow_afn_v2f32_0.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %pow @@ -559,7 +559,7 @@ define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %pow @@ -568,7 +568,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) { define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -577,7 +577,7 @@ define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x floa define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -684,7 +684,7 @@ define <2 x float> @test_pow_afn_v2f32_1.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg1.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg1.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -713,7 +713,7 @@ define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x floa define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) @@ -753,7 +753,7 @@ define <2 x float> @test_pow_afn_v2f32_2.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg2.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg2.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -793,7 +793,7 @@ define float @test_pow_afn_f32_neg3.0(float %x) { define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -803,7 +803,7 @@ define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg3.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -843,7 +843,7 @@ define float @test_pow_afn_f32_neg3.99(float %x) { define <2 x float> @test_pow_afn_v2f32_3.99(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.99 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0x400FEB8520000000)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -853,7 +853,7 @@ define <2 x float> @test_pow_afn_v2f32_3.99(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg3.99(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.99 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0xC00FEB8520000000)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -893,7 +893,7 @@ define float @test_pow_afn_f32_neg8.0(float %x) { define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_8.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 8)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -903,7 +903,7 @@ define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg8.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg8.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -8)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -943,7 +943,7 @@ define float @test_pow_afn_f32_neg12.0(float %x) { define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_12.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 12)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -953,7 +953,7 @@ define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg12.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg12.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -12)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -993,7 +993,7 @@ define float @test_pow_afn_f32_neg13.0(float %x) { define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 13)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1003,7 +1003,7 @@ define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg13.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg13.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -13)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1432,7 +1432,7 @@ define float @test_pow_f32__y_n_2_5(float %x) { define <2 x float> @test_pow_v2f32__y_0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> zeroinitializer) ret <2 x float> %pow @@ -1441,7 +1441,7 @@ define <2 x float> @test_pow_v2f32__y_0(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %pow @@ -1459,7 +1459,7 @@ define <2 x float> @test_pow_v2f32__y_1(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n1(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n1 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1479,7 +1479,7 @@ define <2 x float> @test_pow_v2f32__y_2(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n2(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n2 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1509,7 +1509,7 @@ define <2 x float> @test_pow_v2f32__y_neg_half(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_3 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1519,7 +1519,7 @@ define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n3 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1529,7 +1529,7 @@ define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_2_5(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_2_5 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 2.500000e+00)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1539,7 +1539,7 @@ define <2 x float> @test_pow_v2f32__y_2_5(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n_2_5(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n_2_5 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -2.500000e+00)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1814,7 +1814,7 @@ define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 4.500000e+00)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -2003,7 +2003,7 @@ define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4(<2 x double> %x) { define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5 ; CHECK-SAME: (<2 x double> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> ) +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> splat (double 4.500000e+00)) ; CHECK-NEXT: ret <2 x double> [[POW]] ; %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> ) @@ -2157,7 +2157,7 @@ define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4(<2 x half> %x) { define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5 ; CHECK-SAME: (<2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> ) +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> splat (half 0xH4480)) ; CHECK-NEXT: ret <2 x half> [[POW]] ; %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> ) @@ -2395,7 +2395,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa ; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> @@ -2444,7 +2444,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa ; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll index bd4b86f0387666f..f9c359bc114ed3a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll @@ -277,7 +277,7 @@ define <2 x float> @test_pown_v2f32__y_0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; entry: %call = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> zeroinitializer) @@ -288,7 +288,7 @@ define <2 x float> @test_pown_v2f32__y_0_undef(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_0_undef ; CHECK-SAME: (<2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; entry: %call = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> ) @@ -299,7 +299,7 @@ define <3 x float> @test_pown_v3f32__y_0(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_0 ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; entry: %call = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> %x, <3 x i32> zeroinitializer) @@ -310,7 +310,7 @@ define <4 x float> @test_pown_v4f32__y_0(<4 x float> %x) { ; CHECK-LABEL: define <4 x float> @test_pown_v4f32__y_0 ; CHECK-SAME: (<4 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x float> +; CHECK-NEXT: ret <4 x float> splat (float 1.000000e+00) ; entry: %call = tail call <4 x float> @_Z4pownDv4_fDv4_i(<4 x float> %x, <4 x i32> zeroinitializer) @@ -321,7 +321,7 @@ define <8 x float> @test_pown_v8f32__y_0(<8 x float> %x) { ; CHECK-LABEL: define <8 x float> @test_pown_v8f32__y_0 ; CHECK-SAME: (<8 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x float> +; CHECK-NEXT: ret <8 x float> splat (float 1.000000e+00) ; entry: %call = tail call <8 x float> @_Z4pownDv8_fDv8_i(<8 x float> %x, <8 x i32> zeroinitializer) @@ -332,7 +332,7 @@ define <16 x float> @test_pown_v16f32__y_0(<16 x float> %x) { ; CHECK-LABEL: define <16 x float> @test_pown_v16f32__y_0 ; CHECK-SAME: (<16 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x float> +; CHECK-NEXT: ret <16 x float> splat (float 1.000000e+00) ; entry: %call = tail call <16 x float> @_Z4pownDv16_fDv16_i(<16 x float> %x, <16 x i32> zeroinitializer) @@ -527,7 +527,7 @@ define <2 x float> @test_pown_v2f32__y_neg1(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_neg1 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] ; entry: @@ -539,7 +539,7 @@ define <3 x float> @test_pown_v3f32__y_neg1(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_neg1 ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <3 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; entry: @@ -551,7 +551,7 @@ define <3 x float> @test_pown_v3f32__y_neg1_undef(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_neg1_undef ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <3 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; entry: @@ -563,7 +563,7 @@ define <4 x float> @test_pown_v4f32__y_neg1(<4 x float> %x) { ; CHECK-LABEL: define <4 x float> @test_pown_v4f32__y_neg1 ; CHECK-SAME: (<4 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <4 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <4 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <4 x float> [[__POWRECIP]] ; entry: @@ -575,7 +575,7 @@ define <8 x float> @test_pown_v8f32__y_neg1(<8 x float> %x) { ; CHECK-LABEL: define <8 x float> @test_pown_v8f32__y_neg1 ; CHECK-SAME: (<8 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <8 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <8 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <8 x float> [[__POWRECIP]] ; entry: @@ -587,7 +587,7 @@ define <16 x float> @test_pown_v16f32__y_neg1(<16 x float> %x) { ; CHECK-LABEL: define <16 x float> @test_pown_v16f32__y_neg1 ; CHECK-SAME: (<16 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <16 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <16 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <16 x float> [[__POWRECIP]] ; entry: @@ -698,7 +698,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y) ; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], splat (i32 31) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> @@ -744,7 +744,7 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]]) ; CHECK-NEXT: [[__YTOU:%.*]] = zext <2 x i32> [[Y]] to <2 x i64> -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], splat (i64 63) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64> @@ -790,7 +790,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) { ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]]) ; CHECK-NEXT: [[__YTOU:%.*]] = trunc <2 x i32> [[Y]] to <2 x i16> -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], splat (i16 15) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16> @@ -1001,7 +1001,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32__y_neg3(<2 x float> %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]] ; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[__POWX2]] -; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> , [[__POWPROD]] +; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> splat (float 1.000000e+00), [[__POWPROD]] ; CHECK-NEXT: ret <2 x float> [[__1POWPROD]] ; entry: @@ -1015,7 +1015,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32__y_neg4(<2 x float> %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]] ; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn <2 x float> [[__POWX2]], [[__POWX2]] -; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> , [[__POWX21]] +; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> splat (float 1.000000e+00), [[__POWX21]] ; CHECK-NEXT: ret <2 x float> [[__1POWPROD]] ; entry: diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll index 1a92ca8960a7763..c2b2c693e742aeb 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll @@ -517,7 +517,7 @@ define float @test_powr_afn_f32_neg0.0(float %x) { define <2 x float> @test_powr_afn_v2f32_0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %powr @@ -526,7 +526,7 @@ define <2 x float> @test_powr_afn_v2f32_0.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %powr @@ -545,7 +545,7 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_0.0(<2 x float> %x) { define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -554,7 +554,7 @@ define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x flo define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -661,7 +661,7 @@ define <2 x float> @test_powr_afn_v2f32_1.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg1.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg1.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -690,7 +690,7 @@ define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x flo define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) @@ -730,7 +730,7 @@ define <2 x float> @test_powr_afn_v2f32_2.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg2.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg2.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -2.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -770,7 +770,7 @@ define float @test_powr_afn_f32_neg3.0(float %x) { define <2 x float> @test_powr_afn_v2f32_3.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_3.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 3.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -780,7 +780,7 @@ define <2 x float> @test_powr_afn_v2f32_3.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg3.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg3.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -3.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -820,7 +820,7 @@ define float @test_powr_afn_f32_neg3.99(float %x) { define <2 x float> @test_powr_afn_v2f32_3.99(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_3.99 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0x400FEB8520000000)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -830,7 +830,7 @@ define <2 x float> @test_powr_afn_v2f32_3.99(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg3.99(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg3.99 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0xC00FEB8520000000)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -870,7 +870,7 @@ define float @test_powr_afn_f32_neg8.0(float %x) { define <2 x float> @test_powr_afn_v2f32_8.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_8.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 8.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -880,7 +880,7 @@ define <2 x float> @test_powr_afn_v2f32_8.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg8.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg8.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -8.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -920,7 +920,7 @@ define float @test_powr_afn_f32_neg12.0(float %x) { define <2 x float> @test_powr_afn_v2f32_12.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_12.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 1.200000e+01)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -930,7 +930,7 @@ define <2 x float> @test_powr_afn_v2f32_12.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg12.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg12.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -1.200000e+01)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -970,7 +970,7 @@ define float @test_powr_afn_f32_neg13.0(float %x) { define <2 x float> @test_powr_afn_v2f32_13.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_13.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 1.300000e+01)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -980,7 +980,7 @@ define <2 x float> @test_powr_afn_v2f32_13.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg13.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg13.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -1.300000e+01)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll index 7932f8d1fc5beb8..a2d5ce2d658b576 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll @@ -362,7 +362,7 @@ define <2 x half> @test_rootn_v2f16_2(<2 x half> %x) { define <2 x half> @test_rootn_v2f16_neg1(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg1( ; CHECK-SAME: <2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <2 x half> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <2 x half> splat (half 0xH3C00), [[X]] ; CHECK-NEXT: ret <2 x half> [[__ROOTN2DIV]] ; %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> ) @@ -373,7 +373,7 @@ define <2 x half> @test_rootn_v2f16_neg2(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg2( ; CHECK-SAME: <2 x half> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x half> @llvm.sqrt.v2f16(<2 x half> [[X]]) -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x half> , [[TMP1]], !fpmath [[META0]] +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x half> splat (half 0xH3C00), [[TMP1]], !fpmath [[META0]] ; CHECK-NEXT: ret <2 x half> [[__ROOTN2RSQRT]] ; %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> ) @@ -383,7 +383,7 @@ define <2 x half> @test_rootn_v2f16_neg2(<2 x half> %x) { define <2 x half> @test_rootn_v2f16_neg3(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg3( ; CHECK-SAME: <2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> splat (i32 -3)) ; CHECK-NEXT: ret <2 x half> [[CALL]] ; %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> ) @@ -536,7 +536,7 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 { ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp( ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) #[[ATTR0]] +; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 1)) #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: @@ -795,7 +795,7 @@ define <2 x float> @test_rootn_v2f32__y_neg1(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg1( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__ROOTN2DIV]] ; entry: @@ -807,7 +807,7 @@ define <3 x float> @test_rootn_v3f32__y_neg1(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1( ; CHECK-SAME: <3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <3 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__ROOTN2DIV]] ; entry: @@ -819,7 +819,7 @@ define <3 x float> @test_rootn_v3f32__y_neg1_undef(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1_undef( ; CHECK-SAME: <3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <3 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__ROOTN2DIV]] ; entry: @@ -831,7 +831,7 @@ define <4 x float> @test_rootn_v4f32__y_neg1(<4 x float> %x) { ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_neg1( ; CHECK-SAME: <4 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <4 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <4 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <4 x float> [[__ROOTN2DIV]] ; entry: @@ -843,7 +843,7 @@ define <8 x float> @test_rootn_v8f32__y_neg1(<8 x float> %x) { ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_neg1( ; CHECK-SAME: <8 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <8 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <8 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <8 x float> [[__ROOTN2DIV]] ; entry: @@ -855,7 +855,7 @@ define <16 x float> @test_rootn_v16f32__y_neg1(<16 x float> %x) { ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_neg1( ; CHECK-SAME: <16 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <16 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <16 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <16 x float> [[__ROOTN2DIV]] ; entry: @@ -930,7 +930,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2(<2 x float> %x) { ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x float> , [[TMP0]], !fpmath [[META0]] +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[TMP0]], !fpmath [[META0]] ; CHECK-NEXT: ret <2 x float> [[__ROOTN2RSQRT]] ; entry: @@ -943,7 +943,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2__flags(<2 x float> %x) { ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv nnan nsz contract <2 x float> , [[TMP0]], !fpmath [[META0]] +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv nnan nsz contract <2 x float> splat (float 1.000000e+00), [[TMP0]], !fpmath [[META0]] ; CHECK-NEXT: ret <2 x float> [[__ROOTN2RSQRT]] ; entry: @@ -955,7 +955,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(<2 x float> %x) #1 { ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2__strictfp( ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) #[[ATTR0]] +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[__ROOTN2RSQRT]] ; entry: @@ -1277,7 +1277,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_4(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_4( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 4)) ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: @@ -1289,7 +1289,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg3(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg3( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3)) ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: @@ -1301,7 +1301,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg4(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg4( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -4)) ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: @@ -1313,7 +1313,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_5(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_5( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 5)) ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index b212b9caf8400e8..f6ee007facd7fd0 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -422,7 +422,7 @@ define <2 x float> @basic_fract_v2f32_nonans(<2 x float> nofpclass(nan) %x) { ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> splat (float 0x3FEFFFFFE0000000)) ; GFX6-IR-NEXT: ret <2 x float> [[MIN]] ; ; IR-FRACT-LABEL: define <2 x float> @basic_fract_v2f32_nonans @@ -1244,7 +1244,7 @@ define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) { ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) ; GFX6-IR-NEXT: ret <2 x half> [[MIN]] ; ; GFX7-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan @@ -1252,7 +1252,7 @@ define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) { ; GFX7-IR-NEXT: entry: ; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) ; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] -; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> ) +; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) ; GFX7-IR-NEXT: ret <2 x half> [[MIN]] ; ; IR-LEGALF16-LABEL: define <2 x half> @basic_fract_v2f16_nonan @@ -1668,11 +1668,11 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> splat (float 0x3FEFFFFFE0000000)) ; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x float> [[X]], zeroinitializer ; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x float> [[X]], <2 x float> [[MIN]] ; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) -; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], splat (float 0x7FF0000000000000) ; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]] ; GFX6-IR-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; GFX6-IR-NEXT: ret <2 x float> [[COND6]] @@ -1688,7 +1688,7 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0 ; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1 ; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) -; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], +; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], splat (float 0x7FF0000000000000) ; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]] ; IR-FRACT-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; IR-FRACT-NEXT: ret <2 x float> [[COND6]] @@ -2021,11 +2021,11 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) ; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer ; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]] ; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) -; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) ; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] ; GFX6-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; GFX6-IR-NEXT: ret <2 x half> [[COND6]] @@ -2035,11 +2035,11 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX7-IR-NEXT: entry: ; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) ; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] -; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> ) +; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) ; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer ; GFX7-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]] ; GFX7-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) -; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], +; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) ; GFX7-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] ; GFX7-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; GFX7-IR-NEXT: ret <2 x half> [[COND6]] @@ -2055,7 +2055,7 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; IR-LEGALF16-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0 ; IR-LEGALF16-NEXT: [[COND:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1 ; IR-LEGALF16-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) -; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], +; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) ; IR-LEGALF16-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] ; IR-LEGALF16-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; IR-LEGALF16-NEXT: ret <2 x half> [[COND6]] @@ -2187,11 +2187,11 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x double> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> splat (double 0x3FEFFFFFFFFFFFFF)) ; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x double> [[X]], zeroinitializer ; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x double> [[X]], <2 x double> [[MIN]] ; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]]) -; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], splat (double 0x7FF0000000000000) ; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]] ; GFX6-IR-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; GFX6-IR-NEXT: ret <2 x double> [[COND6]] @@ -2207,7 +2207,7 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc ; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i64 0 ; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP3]], i64 1 ; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]]) -; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], +; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], splat (double 0x7FF0000000000000) ; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]] ; IR-FRACT-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; IR-FRACT-NEXT: ret <2 x double> [[COND6]] diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll index 5ff08a5e8227feb..9b2e2f950a39d64 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll @@ -31,13 +31,13 @@ define void @vector_copy(ptr %a, ptr %b) { ; CHECK-LABEL: define void @vector_copy ; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X:%.*]] = load <4 x i160>, ptr [[A]], align 128 -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i160> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i160> [[X]], splat (i160 32) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i160> [[TMP1]] to <4 x i128> ; CHECK-NEXT: [[X_PTR_RSRC:%.*]] = inttoptr <4 x i128> [[TMP2]] to <4 x ptr addrspace(8)> ; CHECK-NEXT: [[X_PTR_OFF:%.*]] = trunc <4 x i160> [[X]] to <4 x i32> ; CHECK-NEXT: [[B1:%.*]] = getelementptr <4 x i160>, ptr [[B]], i64 2 ; CHECK-NEXT: [[X_PTR_INT_RSRC:%.*]] = ptrtoint <4 x ptr addrspace(8)> [[X_PTR_RSRC]] to <4 x i160> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i160> [[X_PTR_INT_RSRC]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i160> [[X_PTR_INT_RSRC]], splat (i160 32) ; CHECK-NEXT: [[X_PTR_INT_OFF:%.*]] = zext <4 x i32> [[X_PTR_OFF]] to <4 x i160> ; CHECK-NEXT: [[X_PTR_INT:%.*]] = or <4 x i160> [[TMP3]], [[X_PTR_INT_OFF]] ; CHECK-NEXT: store <4 x i160> [[X_PTR_INT]], ptr [[B1]], align 128 diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll index cc98b5333c5bb84..3191abae34ac60e 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll @@ -27,9 +27,9 @@ define <2 x ptr addrspace(7)> @gep_vectors(<2 x ptr addrspace(7)> %in, <2 x i32> ; CHECK-SAME: ({ <2 x ptr addrspace(8)>, <2 x i32> } [[IN:%.*]], <2 x i32> [[IDX:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[IN_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 0 ; CHECK-NEXT: [[IN_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 1 -; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[IDX]], -; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], -; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], +; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[IDX]], splat (i32 40) +; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], splat (i32 8) +; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], splat (i32 24) ; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]] ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1 @@ -47,9 +47,9 @@ define <2 x ptr addrspace(7)> @gep_vector_scalar(<2 x ptr addrspace(7)> %in, i64 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLAT_C:%.*]] = trunc <2 x i64> [[DOTSPLAT]] to <2 x i32> -; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[DOTSPLAT_C]], -; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], -; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], +; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[DOTSPLAT_C]], splat (i32 40) +; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], splat (i32 8) +; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], splat (i32 24) ; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]] ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1 @@ -153,7 +153,7 @@ define <2 x i160> @ptrtoint_vec(<2 x ptr addrspace(7)> %ptr) { ; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[PTR]], 0 ; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[PTR]], 1 ; CHECK-NEXT: [[RET_RSRC:%.*]] = ptrtoint <2 x ptr addrspace(8)> [[PTR_RSRC]] to <2 x i160> -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i160> [[RET_RSRC]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i160> [[RET_RSRC]], splat (i160 32) ; CHECK-NEXT: [[RET_OFF:%.*]] = zext <2 x i32> [[PTR_OFF]] to <2 x i160> ; CHECK-NEXT: [[RET:%.*]] = or <2 x i160> [[TMP1]], [[RET_OFF]] ; CHECK-NEXT: ret <2 x i160> [[RET]] @@ -221,7 +221,7 @@ define ptr addrspace(7) @inttoptr(i160 %v) { define <2 x ptr addrspace(7)> @inttoptr_vec(<2 x i160> %v) { ; CHECK-LABEL: define { <2 x ptr addrspace(8)>, <2 x i32> } @inttoptr_vec ; CHECK-SAME: (<2 x i160> [[V:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i160> [[V]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i160> [[V]], splat (i160 32) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i160> [[TMP1]] to <2 x i128> ; CHECK-NEXT: [[RET_RSRC:%.*]] = inttoptr <2 x i128> [[TMP2]] to <2 x ptr addrspace(8)> ; CHECK-NEXT: [[RET_OFF:%.*]] = trunc <2 x i160> [[V]] to <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll index bd9234d864b3d8f..05c727201bbf1d6 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll @@ -63,7 +63,7 @@ define amdgpu_vs void @promote_store_aggr() #0 { ; CHECK-NEXT: [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1 ; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1 ; CHECK-NEXT: store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4 -; CHECK-NEXT: store <4 x float> , ptr addrspace(1) @pv, align 16 +; CHECK-NEXT: store <4 x float> splat (float 1.000000e+00), ptr addrspace(1) @pv, align 16 ; CHECK-NEXT: ret void ; %i = alloca i32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll index f1e2737b370ef0b..74651e10c7809f3 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll @@ -23,7 +23,7 @@ entry: define amdgpu_kernel void @memset_all_5(i64 %val) { ; CHECK-LABEL: @memset_all_5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> , i64 [[VAL:%.*]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> splat (i64 361700864190383365), i64 [[VAL:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i32 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index bf21ed6898ff466..c8e3f97f61515fb 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -441,7 +441,7 @@ define half @test_pow_fast_f16__y_13(half %x) { ; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) ; GCN: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x) ; GCN: %__log2 = tail call fast <2 x half> @llvm.log2.v2f16(<2 x half> %__fabs) -; GCN: %__ylogx = fmul fast <2 x half> %__log2, +; GCN: %__ylogx = fmul fast <2 x half> %__log2, splat (half 0xH4A80) ; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx) ; GCN: %1 = tail call <2 x half> @llvm.copysign.v2f16(<2 x half> %__exp2, <2 x half> %x) define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) { diff --git a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll index 1e60261df13046a..29996d68040e73d 100644 --- a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll @@ -357,7 +357,7 @@ define void @broken_phi() { ; GFX906-NEXT: bb: ; GFX906-NEXT: br label [[BB1:%.*]] ; GFX906: bb1: -; GFX906-NEXT: [[I:%.*]] = phi <4 x i8> [ , [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] +; GFX906-NEXT: [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] ; GFX906-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]] ; GFX906: bb2: ; GFX906-NEXT: br label [[BB3]] @@ -432,7 +432,7 @@ define amdgpu_kernel void @deletedPHI(i32 %in0, i1 %cmp, <10 x i8> %invec0) { ; GFX906-NEXT: br label [[BB_1:%.*]] ; GFX906: bb.1: ; GFX906-NEXT: [[PHI0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB_11:%.*]] ] -; GFX906-NEXT: [[PHI1:%.*]] = phi <10 x i8> [ , [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ] +; GFX906-NEXT: [[PHI1:%.*]] = phi <10 x i8> [ splat (i8 1), [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ] ; GFX906-NEXT: br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]] ; GFX906: bb.2: ; GFX906-NEXT: br label [[BB_3]] @@ -514,7 +514,7 @@ define amdgpu_kernel void @multiple_unwind(i1 %cmp, <10 x i8> %invec) { ; GFX906-NEXT: entry: ; GFX906-NEXT: br label [[BB_1:%.*]] ; GFX906: bb.1: -; GFX906-NEXT: [[PHI0:%.*]] = phi <10 x i8> [ , [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ] +; GFX906-NEXT: [[PHI0:%.*]] = phi <10 x i8> [ splat (i8 1), [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ] ; GFX906-NEXT: br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]] ; GFX906: bb.2: ; GFX906-NEXT: br label [[BB_3]] diff --git a/llvm/test/CodeGen/ARM/vector-promotion.ll b/llvm/test/CodeGen/ARM/vector-promotion.ll index f4a2a4a4521e995..a9a8f58963a374f 100644 --- a/llvm/test/CodeGen/ARM/vector-promotion.ll +++ b/llvm/test/CodeGen/ARM/vector-promotion.ll @@ -127,7 +127,7 @@ define i32 @unsupportedMultiUses(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 [[EXTRACT]], 7 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = udiv <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = udiv <2 x i32> [[LOAD]], splat (i32 7) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest @@ -146,7 +146,7 @@ define void @udivCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = urem i32 [[EXTRACT]], 7 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = urem <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = urem <2 x i32> [[LOAD]], splat (i32 7) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest @@ -165,7 +165,7 @@ define void @uremCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sdiv i32 [[EXTRACT]], 7 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = sdiv <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = sdiv <2 x i32> [[LOAD]], splat (i32 7) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest @@ -184,7 +184,7 @@ define void @sdivCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 [[EXTRACT]], 7 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = srem <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = srem <2 x i32> [[LOAD]], splat (i32 7) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest @@ -203,7 +203,7 @@ define void @sremCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fdiv float [[EXTRACT]], 7.0 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fdiv <2 x float> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fdiv <2 x float> [[LOAD]], splat (float 7.000000e+00) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store float [[RES]], ptr %dest @@ -222,7 +222,7 @@ define void @fdivCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem float [[EXTRACT]], 7.0 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem <2 x float> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem <2 x float> [[LOAD]], splat (float 7.000000e+00) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store float [[RES]], ptr %dest @@ -342,7 +342,7 @@ define void @simpleOneInstructionPromotionFloat(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 %idx ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1 ; Vector version: -; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], splat (i32 1) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[OR]], i32 %idx ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll index 73279844e91b330..6eece708fa337d0 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll @@ -135,18 +135,18 @@ define void @f3(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { ; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> ; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]]) ; CHECK-NEXT: [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8> -; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> zeroinitializer, i32 [[PTI1]]) +; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]]) ; CHECK-NEXT: [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8> ; CHECK-NEXT: [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> ; CHECK-NEXT: [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> ; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8> -; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8> ; CHECK-NEXT: [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> ; CHECK-NEXT: [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8> -; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8> ; CHECK-NEXT: [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1> ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]]), !tbaa [[TBAA5:![0-9]+]] @@ -188,18 +188,18 @@ define void @f4(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { ; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> ; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]]) ; CHECK-NEXT: [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8> -; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> zeroinitializer, i32 [[PTI1]]) +; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]]) ; CHECK-NEXT: [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8> ; CHECK-NEXT: [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> ; CHECK-NEXT: [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> ; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8> -; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8> ; CHECK-NEXT: [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> ; CHECK-NEXT: [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8> -; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8> ; CHECK-NEXT: [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1> ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]]) @@ -241,18 +241,18 @@ define void @f5(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { ; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> ; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]]) ; CHECK-NEXT: [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8> -; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> zeroinitializer, i32 [[PTI1]]) +; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]]) ; CHECK-NEXT: [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8> ; CHECK-NEXT: [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> ; CHECK-NEXT: [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> ; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8> -; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8> ; CHECK-NEXT: [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> ; CHECK-NEXT: [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8> -; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8> ; CHECK-NEXT: [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1> ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]]), !tbaa [[TBAA5]] diff --git a/llvm/test/CodeGen/NVPTX/variadics-lowering.ll b/llvm/test/CodeGen/NVPTX/variadics-lowering.ll index 5c8eb2658436450..5502980a263b2e6 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-lowering.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-lowering.ll @@ -276,7 +276,7 @@ define dso_local i32 @baz() { ; CHECK-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[BAZ_VARARG:%.*]], align 16 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]]) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[BAZ_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP0]], align 16 ; CHECK-NEXT: [[CALL:%.*]] = call i32 @variadics3(i32 noundef 1, ptr [[VARARG_BUFFER]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]]) ; CHECK-NEXT: ret i32 [[CALL]] diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll index 32aec9586c29cb2..58b2338650eaab8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll @@ -12,16 +12,16 @@ define void @gather_bad_or(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], -; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], +; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5) +; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1) ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: @@ -58,15 +58,15 @@ define void @gather_narrow_index(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i32> [[TMP0]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[TMP1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[TMP1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: @@ -102,16 +102,16 @@ define void @gather_broken_stride(ptr noalias nocapture %A, ptr noalias nocaptur ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], -; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], +; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5) +; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1) ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll index d723c2f6df1afd3..2cbbfc019ab4df8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll @@ -16,8 +16,8 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -102,8 +102,8 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapt ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 155, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 -5, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 -5, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -145,8 +145,8 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 0, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 0, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -194,10 +194,10 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> , i32 32) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -282,8 +282,8 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -328,8 +328,8 @@ define void @gather_unknown_pow2(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 [[TMP0]], <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -376,13 +376,13 @@ define void @negative_shl_non_commute(ptr noalias nocapture %A, ptr noalias noca ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I:%.*]] = shl nsw <8 x i64> [[DOTSPLAT]], [[VEC_IND]] ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <8 x i64> [[I]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[I1]], i32 4, <8 x i1> , <8 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[I1]], i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: @@ -426,10 +426,10 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -477,10 +477,10 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 8, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[B]], i64 [[VEC_IND_SCALAR1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 4 ; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr [[I2]], i64 8 @@ -548,37 +548,37 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_SCALAR11:%.*]] = phi i64 [ 12, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR13:%.*]] = phi i64 [ 3, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP2]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP2]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I3:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER52]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP2]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP2]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]] -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP4]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP5]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP4]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP5]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]] -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP6]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP7]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP6]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP7]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER54]], [[WIDE_MASKED_GATHER53]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP6]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP6]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]] -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP8]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP9]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP8]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP9]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]] -; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP10]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP11]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP10]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP11]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I13:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER56]], [[WIDE_MASKED_GATHER55]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP10]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP10]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]] -; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP12]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP13]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP12]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP13]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]] -; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP14]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP15]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP14]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP15]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER58]], [[WIDE_MASKED_GATHER57]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP14]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP14]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 128 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR2]] = add i64 [[VEC_IND_SCALAR1]], 32 @@ -655,10 +655,10 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; V-NEXT: [[I3_SCALAR1:%.*]] = phi i64 [ 10, [[BB]] ], [ [[I16_SCALAR2:%.*]], [[BB2]] ] ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG1:%.*]], i64 [[I3_SCALAR]] ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[I3_SCALAR1]] -; V-NEXT: [[TMP2:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP0]], i64 40, <2 x i1> , i32 2) -; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> , <2 x ptr> [[TMP2]], <2 x ptr> undef, i32 2) -; V-NEXT: [[TMP3:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP1]], i64 40, <2 x i1> , i32 2) -; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> , <2 x ptr> [[TMP3]], <2 x ptr> undef, i32 2) +; V-NEXT: [[TMP2:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP0]], i64 40, <2 x i1> splat (i1 true), i32 2) +; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> splat (i1 true), <2 x ptr> [[TMP2]], <2 x ptr> undef, i32 2) +; V-NEXT: [[TMP3:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP1]], i64 40, <2 x i1> splat (i1 true), i32 2) +; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> splat (i1 true), <2 x ptr> [[TMP3]], <2 x ptr> undef, i32 2) ; V-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]] ; V-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8 ; V-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2 @@ -677,19 +677,19 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; ZVE32F: bb2: ; ZVE32F-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ] ; ZVE32F-NEXT: [[I3:%.*]] = phi <2 x i64> [ , [[BB]] ], [ [[I16:%.*]], [[BB2]] ] -; ZVE32F-NEXT: [[I4:%.*]] = mul nuw nsw <2 x i64> [[I3]], -; ZVE32F-NEXT: [[I5:%.*]] = mul <2 x i64> [[I3]], -; ZVE32F-NEXT: [[I6:%.*]] = add <2 x i64> [[I5]], +; ZVE32F-NEXT: [[I4:%.*]] = mul nuw nsw <2 x i64> [[I3]], splat (i64 5) +; ZVE32F-NEXT: [[I5:%.*]] = mul <2 x i64> [[I3]], splat (i64 5) +; ZVE32F-NEXT: [[I6:%.*]] = add <2 x i64> [[I5]], splat (i64 10) ; ZVE32F-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], <2 x i64> [[I4]] ; ZVE32F-NEXT: [[I8:%.*]] = getelementptr inbounds ptr, ptr [[ARG1]], <2 x i64> [[I6]] -; ZVE32F-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I7]], i32 8, <2 x i1> , <2 x ptr> undef) -; ZVE32F-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I8]], i32 8, <2 x i1> , <2 x ptr> undef) +; ZVE32F-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I7]], i32 8, <2 x i1> splat (i1 true), <2 x ptr> undef) +; ZVE32F-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I8]], i32 8, <2 x i1> splat (i1 true), <2 x ptr> undef) ; ZVE32F-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]] ; ZVE32F-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8 ; ZVE32F-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2 ; ZVE32F-NEXT: store <2 x ptr> [[I10]], ptr [[I13]], align 8 ; ZVE32F-NEXT: [[I15]] = add nuw i64 [[I]], 4 -; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], +; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], splat (i64 4) ; ZVE32F-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024 ; ZVE32F-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]] ; ZVE32F: bb18: @@ -738,8 +738,8 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; V-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8 ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i64 [[I3_SCALAR]] ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG]], i64 [[I3_SCALAR1]] -; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> , i32 2) -; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> , i32 2) +; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> splat (i1 true), i32 2) +; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> splat (i1 true), i32 2) ; V-NEXT: [[I15]] = add nuw i64 [[I]], 4 ; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20 ; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20 @@ -758,15 +758,15 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; ZVE32F-NEXT: [[I6:%.*]] = load <2 x ptr>, ptr [[I4]], align 8 ; ZVE32F-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 2 ; ZVE32F-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8 -; ZVE32F-NEXT: [[I10:%.*]] = mul nuw nsw <2 x i64> [[I3]], -; ZVE32F-NEXT: [[I11:%.*]] = mul <2 x i64> [[I3]], -; ZVE32F-NEXT: [[I12:%.*]] = add <2 x i64> [[I11]], +; ZVE32F-NEXT: [[I10:%.*]] = mul nuw nsw <2 x i64> [[I3]], splat (i64 5) +; ZVE32F-NEXT: [[I11:%.*]] = mul <2 x i64> [[I3]], splat (i64 5) +; ZVE32F-NEXT: [[I12:%.*]] = add <2 x i64> [[I11]], splat (i64 10) ; ZVE32F-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], <2 x i64> [[I10]] ; ZVE32F-NEXT: [[I14:%.*]] = getelementptr inbounds ptr, ptr [[ARG]], <2 x i64> [[I12]] -; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I6]], <2 x ptr> [[I13]], i32 8, <2 x i1> ) -; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I9]], <2 x ptr> [[I14]], i32 8, <2 x i1> ) +; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I6]], <2 x ptr> [[I13]], i32 8, <2 x i1> splat (i1 true)) +; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I9]], <2 x ptr> [[I14]], i32 8, <2 x i1> splat (i1 true)) ; ZVE32F-NEXT: [[I15]] = add nuw i64 [[I]], 4 -; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], +; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], splat (i64 4) ; ZVE32F-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024 ; ZVE32F-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]] ; ZVE32F: bb18: @@ -822,8 +822,8 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: [[I17_SCALAR:%.*]] = phi i64 [ [[START]], [[BB9]] ], [ [[I28_SCALAR:%.*]], [[BB15]] ] ; CHECK-NEXT: [[I18:%.*]] = add i64 [[I16]], [[I4]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I17_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) -; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I22:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I18]] ; CHECK-NEXT: [[I24:%.*]] = load <32 x i8>, ptr [[I22]], align 1 ; CHECK-NEXT: [[I25:%.*]] = add <32 x i8> [[I24]], [[I21]] @@ -931,8 +931,8 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr ; CHECK-NEXT: [[I5:%.*]] = phi i64 [ [[I13:%.*]], [[BB4]] ], [ 0, [[BB2]] ] ; CHECK-NEXT: [[I6_SCALAR:%.*]] = phi i64 [ 0, [[BB2]] ], [ [[I14_SCALAR:%.*]], [[BB4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I6_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr [[TMP0]], i64 5, <16 x i1> , i32 16) -; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.vp.select.v16i8(<16 x i1> , <16 x i8> [[TMP1]], <16 x i8> undef, i32 16) +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr [[TMP0]], i64 5, <16 x i1> splat (i1 true), i32 16) +; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.vp.select.v16i8(<16 x i1> splat (i1 true), <16 x i8> [[TMP1]], <16 x i8> undef, i32 16) ; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I5]] ; CHECK-NEXT: [[I11:%.*]] = load <16 x i8>, ptr [[I10]], align 1 ; CHECK-NEXT: [[I12:%.*]] = add <16 x i8> [[I11]], [[I9]] @@ -974,8 +974,8 @@ bb16: ; preds = %bb4, %bb define <8 x i8> @broadcast_ptr_base(ptr %a) { ; CHECK-LABEL: @broadcast_ptr_base( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr [[A:%.*]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vp.select.v8i8(<8 x i1> , <8 x i8> [[TMP0]], <8 x i8> poison, i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr [[A:%.*]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vp.select.v8i8(<8 x i1> splat (i1 true), <8 x i8> [[TMP0]], <8 x i8> poison, i32 8) ; CHECK-NEXT: ret <8 x i8> [[TMP1]] ; entry: @@ -995,15 +995,15 @@ define void @gather_narrow_idx(ptr noalias nocapture %A, ptr noalias nocapture r ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i16> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], +; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], splat (i16 5) ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i16> [[I]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], splat (i16 32) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll index 6342e288647c2fe..e70868d4ffeb036 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll @@ -119,7 +119,7 @@ define dso_local void @foo3(ptr noalias nocapture %A, ptr noalias nocapture read ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[INDUCTION]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[INDUCTION]], splat (i32 32002) ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) ; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] @@ -226,13 +226,33 @@ for.cond.cleanup: ret void } +; +define dso_local void @inconsistent_tripcounts(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @inconsistent_tripcounts( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 -1) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @inconsistent_tripcounts(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 8001) br label %vector.body @@ -261,13 +281,33 @@ for.cond.cleanup: ret void } +; +define dso_local void @overflow_in_sub(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @overflow_in_sub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 1073741824) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @overflow_in_sub(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 1073741824) br label %vector.body @@ -296,13 +336,33 @@ for.cond.cleanup: } +; +define dso_local void @IV_not_an_induction(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @IV_not_an_induction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[N:%.*]], i32 32003) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @IV_not_an_induction(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 8001) br label %vector.body @@ -331,13 +391,33 @@ for.cond.cleanup: ret void } +; +define dso_local void @IV_wrong_step(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @IV_wrong_step( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @IV_wrong_step(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 8001) br label %vector.body @@ -369,13 +449,33 @@ for.cond.cleanup: ret void } +; +define dso_local void @IV_step_not_constant(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @IV_step_not_constant( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[N:%.*]] +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @IV_step_not_constant(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 8001) br label %vector.body @@ -406,13 +506,49 @@ for.cond.cleanup: ret void } +; +define dso_local void @outerloop_phi(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @outerloop_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP24:%.*]] = icmp eq i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_PH_PREHEADER:%.*]] +; CHECK: vector.ph.preheader: +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[LSR_IV36:%.*]] = phi ptr [ [[B:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP37:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] +; CHECK-NEXT: [[LSR_IV31:%.*]] = phi ptr [ [[C:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP32:%.*]], [[FOR_COND_CLEANUP3]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[A:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP:%.*]], [[FOR_COND_CLEANUP3]] ] +; CHECK-NEXT: [[J_025:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3]] ], [ 0, [[VECTOR_PH_PREHEADER]] ] +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 1025) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV38:%.*]] = phi ptr [ [[SCEVGEP39:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV36]], [[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV33:%.*]] = phi ptr [ [[SCEVGEP34:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV31]], [[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV28:%.*]] = phi ptr [ [[SCEVGEP29:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV]], [[VECTOR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[J_025]], i32 4096) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV38]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV33]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD27]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP1]], ptr [[LSR_IV28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP29]] = getelementptr i32, ptr [[LSR_IV28]], i32 4 +; CHECK-NEXT: [[SCEVGEP34]] = getelementptr i32, ptr [[LSR_IV33]], i32 4 +; CHECK-NEXT: [[SCEVGEP39]] = getelementptr i32, ptr [[LSR_IV38]], i32 4 +; CHECK-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP3]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.cond.cleanup3: +; CHECK-NEXT: [[INC11]] = add nuw i32 [[J_025]], 1 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 1 +; CHECK-NEXT: [[SCEVGEP32]] = getelementptr i32, ptr [[LSR_IV31]], i32 1 +; CHECK-NEXT: [[SCEVGEP37]] = getelementptr i32, ptr [[LSR_IV36]], i32 1 +; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i32 [[INC11]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR_COND_CLEANUP]], label [[VECTOR_PH]] ; -define dso_local void @outerloop_phi(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp24 = icmp eq i32 %N, 0 br i1 %cmp24, label %for.cond.cleanup, label %vector.ph.preheader diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll index b91800c48cec4f4..239372e1addb231 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll @@ -6,13 +6,13 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define arm_aapcs_vfpcc void @push_out_add_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_add_sub_block( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , -; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], +; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , splat (i32 6) +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]] -; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], +; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 32) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] @@ -65,13 +65,13 @@ end: define arm_aapcs_vfpcc void @push_out_add_sub_block_commutedphi(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_add_sub_block_commutedphi( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , -; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], +; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , splat (i32 6) +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]] -; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], +; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 32) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] @@ -124,15 +124,15 @@ end: define arm_aapcs_vfpcc void @push_out_mul_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_mul_sub_block( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> , -; CHECK-NEXT: [[PRODUCT:%.*]] = mul <4 x i32> , -; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], -; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], +; CHECK-NEXT: [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> , splat (i32 3) +; CHECK-NEXT: [[PRODUCT:%.*]] = mul <4 x i32> splat (i32 8), splat (i32 3) +; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], splat (i32 6) +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]] -; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], +; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 96) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] @@ -195,8 +195,8 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(ptr noalias nocapture readonl ; CHECK: vector.2.ph: ; CHECK-NEXT: br label [[VECTOR_2_BODY:%.*]] ; CHECK: vector.2.body: -; CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3) +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[TMP0]], splat (i32 2) ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -211,7 +211,7 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(ptr noalias nocapture readonl ; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY_END]], label [[VECTOR_2_BODY]] ; CHECK: vector.body.end: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[TMP5]], label [[END:%.*]], label [[VECTOR_BODY]] ; CHECK: end: @@ -261,13 +261,13 @@ define arm_aapcs_vfpcc void @invariant_add(ptr noalias nocapture readonly %data, ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3) ; CHECK-NEXT: [[L1:%.*]] = add <4 x i32> [[L0]], [[VEC_IND]] ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr [[DATA:%.*]], <4 x i32> [[L1]], i32 32, i32 2, i32 1) ; CHECK-NEXT: [[L3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[L3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; CHECK-NEXT: [[L5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[L5]], label [[END:%.*]], label [[VECTOR_BODY]] ; CHECK: end: diff --git a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll index c18ae82aeb71fac..7846f82ad54cd05 100644 --- a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll +++ b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll @@ -523,7 +523,7 @@ declare void @dummy(i64, i64, i64) define void @vectorPromotion() { ; OPTALL-LABEL: define void @vectorPromotion() { ; OPTALL-NEXT: entry: -; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, +; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, splat (i32 8) ; OPTALL-NEXT: [[B:%.*]] = zext <2 x i32> [[A]] to <2 x i64> ; OPTALL-NEXT: ret void ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll index 4cc038f03ff2c36..8a1e3551b2741f9 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool opt --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; ; RUN: opt < %s -passes=msan -S | FileCheck %s ; @@ -14,7 +14,7 @@ define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: unreachable @@ -25,7 +25,7 @@ define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -58,7 +58,7 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -69,7 +69,7 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -102,7 +102,7 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -113,7 +113,7 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -146,7 +146,7 @@ define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -157,7 +157,7 @@ define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -189,7 +189,7 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -199,9 +199,9 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -217,7 +217,7 @@ define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -228,7 +228,7 @@ define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -258,7 +258,7 @@ define i64 @sqshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -286,7 +286,7 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -297,7 +297,7 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -330,7 +330,7 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -341,7 +341,7 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -374,7 +374,7 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -385,7 +385,7 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -418,7 +418,7 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -429,7 +429,7 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -463,7 +463,7 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -474,7 +474,7 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -508,7 +508,7 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -519,7 +519,7 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -553,7 +553,7 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -564,7 +564,7 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -598,7 +598,7 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -609,7 +609,7 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -643,7 +643,7 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -654,7 +654,7 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -688,7 +688,7 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -699,7 +699,7 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -733,7 +733,7 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -744,7 +744,7 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -778,7 +778,7 @@ define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -789,7 +789,7 @@ define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -821,7 +821,7 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -831,9 +831,9 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -849,7 +849,7 @@ define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -860,7 +860,7 @@ define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -890,7 +890,7 @@ define i64 @uqshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -941,7 +941,7 @@ define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -952,7 +952,7 @@ define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -985,7 +985,7 @@ define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -996,7 +996,7 @@ define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1029,7 +1029,7 @@ define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1040,7 +1040,7 @@ define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1073,7 +1073,7 @@ define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1084,7 +1084,7 @@ define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1116,7 +1116,7 @@ define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1126,9 +1126,9 @@ define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -1144,7 +1144,7 @@ define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1155,7 +1155,7 @@ define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1185,7 +1185,7 @@ define i64 @srshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1213,7 +1213,7 @@ define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1224,7 +1224,7 @@ define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1257,7 +1257,7 @@ define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1268,7 +1268,7 @@ define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1301,7 +1301,7 @@ define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1312,7 +1312,7 @@ define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1345,7 +1345,7 @@ define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1356,7 +1356,7 @@ define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1388,7 +1388,7 @@ define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1398,9 +1398,9 @@ define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -1416,7 +1416,7 @@ define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1427,7 +1427,7 @@ define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1457,7 +1457,7 @@ define i64 @urshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1485,7 +1485,7 @@ define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1496,7 +1496,7 @@ define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1530,7 +1530,7 @@ define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1541,7 +1541,7 @@ define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1575,7 +1575,7 @@ define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1586,7 +1586,7 @@ define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1620,7 +1620,7 @@ define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1631,7 +1631,7 @@ define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1665,7 +1665,7 @@ define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1676,7 +1676,7 @@ define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1710,7 +1710,7 @@ define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1721,7 +1721,7 @@ define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1755,7 +1755,7 @@ define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1766,7 +1766,7 @@ define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1800,7 +1800,7 @@ define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1811,7 +1811,7 @@ define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1867,7 +1867,7 @@ define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1878,7 +1878,7 @@ define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1911,7 +1911,7 @@ define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1922,7 +1922,7 @@ define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1955,7 +1955,7 @@ define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1966,7 +1966,7 @@ define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1999,7 +1999,7 @@ define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2010,7 +2010,7 @@ define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2043,7 +2043,7 @@ define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2054,7 +2054,7 @@ define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2087,7 +2087,7 @@ define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2098,7 +2098,7 @@ define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2131,7 +2131,7 @@ define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2142,7 +2142,7 @@ define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2176,7 +2176,7 @@ define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2187,7 +2187,7 @@ define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2221,7 +2221,7 @@ define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2232,7 +2232,7 @@ define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2266,7 +2266,7 @@ define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2277,7 +2277,7 @@ define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2311,7 +2311,7 @@ define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2322,7 +2322,7 @@ define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2354,7 +2354,7 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2364,9 +2364,9 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -2382,7 +2382,7 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2393,7 +2393,7 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2423,7 +2423,7 @@ define i64 @sqrshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2451,7 +2451,7 @@ define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2462,7 +2462,7 @@ define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2496,7 +2496,7 @@ define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2507,7 +2507,7 @@ define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2541,7 +2541,7 @@ define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2552,7 +2552,7 @@ define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2586,7 +2586,7 @@ define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2597,7 +2597,7 @@ define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2631,7 +2631,7 @@ define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2642,7 +2642,7 @@ define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2674,7 +2674,7 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2684,9 +2684,9 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -2702,7 +2702,7 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2713,7 +2713,7 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2743,7 +2743,7 @@ define i64 @uqrshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2792,7 +2792,7 @@ define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2802,9 +2802,9 @@ define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -2819,7 +2819,7 @@ define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2829,9 +2829,9 @@ define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -2846,7 +2846,7 @@ define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2856,9 +2856,9 @@ define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -2873,7 +2873,7 @@ define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2883,9 +2883,9 @@ define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -2900,7 +2900,7 @@ define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2910,9 +2910,9 @@ define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -2927,7 +2927,7 @@ define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2937,9 +2937,9 @@ define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -2954,7 +2954,7 @@ define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2964,9 +2964,9 @@ define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -2981,7 +2981,7 @@ define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2991,9 +2991,9 @@ define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -3008,7 +3008,7 @@ define i64 @urshr_scalar(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3035,7 +3035,7 @@ define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3045,9 +3045,9 @@ define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -3062,7 +3062,7 @@ define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3072,9 +3072,9 @@ define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -3089,7 +3089,7 @@ define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3099,9 +3099,9 @@ define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -3116,7 +3116,7 @@ define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3126,9 +3126,9 @@ define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -3143,7 +3143,7 @@ define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3153,9 +3153,9 @@ define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -3170,7 +3170,7 @@ define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3180,9 +3180,9 @@ define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -3197,7 +3197,7 @@ define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3207,9 +3207,9 @@ define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -3224,7 +3224,7 @@ define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3234,9 +3234,9 @@ define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -3251,7 +3251,7 @@ define i64 @srshr_scalar(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3278,7 +3278,7 @@ define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3288,9 +3288,9 @@ define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -3305,7 +3305,7 @@ define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3315,9 +3315,9 @@ define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -3332,7 +3332,7 @@ define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3342,9 +3342,9 @@ define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -3359,7 +3359,7 @@ define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3369,9 +3369,9 @@ define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -3386,7 +3386,7 @@ define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3396,9 +3396,9 @@ define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -3413,7 +3413,7 @@ define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3423,9 +3423,9 @@ define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -3440,7 +3440,7 @@ define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3450,9 +3450,9 @@ define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -3467,7 +3467,7 @@ define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3477,9 +3477,9 @@ define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -3494,7 +3494,7 @@ define i64 @sqshlu_i64_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3521,7 +3521,7 @@ define i32 @sqshlu_i32_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3560,7 +3560,7 @@ define <8 x i8> @rshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3587,7 +3587,7 @@ define <4 x i16> @rshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3614,7 +3614,7 @@ define <2 x i32> @rshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3642,7 +3642,7 @@ define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3653,7 +3653,7 @@ define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3685,7 +3685,7 @@ define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3696,7 +3696,7 @@ define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3728,7 +3728,7 @@ define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3739,7 +3739,7 @@ define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3774,7 +3774,7 @@ define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3784,9 +3784,9 @@ define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP8]] to <8 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 @@ -3804,7 +3804,7 @@ define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3814,9 +3814,9 @@ define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 @@ -3834,7 +3834,7 @@ define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3844,9 +3844,9 @@ define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 @@ -3865,7 +3865,7 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3876,7 +3876,7 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3886,9 +3886,9 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> ; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSPROP]], <16 x i32> @@ -3911,7 +3911,7 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3922,7 +3922,7 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3932,9 +3932,9 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSPROP]], <8 x i32> @@ -3957,7 +3957,7 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3968,7 +3968,7 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3978,9 +3978,9 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], splat (i64 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> ; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSPROP]], <4 x i32> @@ -4021,7 +4021,7 @@ define <8 x i8> @sqshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4048,7 +4048,7 @@ define <4 x i16> @sqshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4075,7 +4075,7 @@ define <2 x i32> @sqshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4104,7 +4104,7 @@ define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4115,7 +4115,7 @@ define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4147,7 +4147,7 @@ define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4158,7 +4158,7 @@ define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4190,7 +4190,7 @@ define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4201,7 +4201,7 @@ define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4252,7 +4252,7 @@ define <8 x i8> @sqshrun8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4279,7 +4279,7 @@ define <4 x i16> @sqshrun4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4306,7 +4306,7 @@ define <2 x i32> @sqshrun2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4334,7 +4334,7 @@ define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4345,7 +4345,7 @@ define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4377,7 +4377,7 @@ define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4388,7 +4388,7 @@ define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4420,7 +4420,7 @@ define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4431,7 +4431,7 @@ define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4482,7 +4482,7 @@ define <8 x i8> @sqrshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4509,7 +4509,7 @@ define <4 x i16> @sqrshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4536,7 +4536,7 @@ define <2 x i32> @sqrshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4564,7 +4564,7 @@ define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4575,7 +4575,7 @@ define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4607,7 +4607,7 @@ define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4618,7 +4618,7 @@ define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4650,7 +4650,7 @@ define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4661,7 +4661,7 @@ define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4712,7 +4712,7 @@ define <8 x i8> @sqrshrun8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4739,7 +4739,7 @@ define <4 x i16> @sqrshrun4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4766,7 +4766,7 @@ define <2 x i32> @sqrshrun2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4794,7 +4794,7 @@ define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4805,7 +4805,7 @@ define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4837,7 +4837,7 @@ define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4848,7 +4848,7 @@ define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4880,7 +4880,7 @@ define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4891,7 +4891,7 @@ define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4942,7 +4942,7 @@ define <8 x i8> @uqrshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4969,7 +4969,7 @@ define <4 x i16> @uqrshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4996,7 +4996,7 @@ define <2 x i32> @uqrshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5024,7 +5024,7 @@ define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5035,7 +5035,7 @@ define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5067,7 +5067,7 @@ define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5078,7 +5078,7 @@ define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5110,7 +5110,7 @@ define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5121,7 +5121,7 @@ define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5172,7 +5172,7 @@ define <8 x i8> @uqshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5199,7 +5199,7 @@ define <4 x i16> @uqshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5226,7 +5226,7 @@ define <2 x i32> @uqshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5254,7 +5254,7 @@ define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5265,7 +5265,7 @@ define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5297,7 +5297,7 @@ define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5308,7 +5308,7 @@ define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5340,7 +5340,7 @@ define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5351,7 +5351,7 @@ define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5387,7 +5387,7 @@ define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5399,9 +5399,9 @@ define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1) ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5417,7 +5417,7 @@ define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5429,9 +5429,9 @@ define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5447,7 +5447,7 @@ define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5459,9 +5459,9 @@ define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -5477,7 +5477,7 @@ define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5487,13 +5487,13 @@ define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> , <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1), <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1) ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5510,7 +5510,7 @@ define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5520,13 +5520,13 @@ define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5543,7 +5543,7 @@ define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5553,13 +5553,13 @@ define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> , <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <2 x i32> [[_MSPROP]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -5583,7 +5583,7 @@ define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5595,9 +5595,9 @@ define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5613,7 +5613,7 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind sanitize_memory ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5648,7 +5648,7 @@ define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sani ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5660,9 +5660,9 @@ define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sani ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i8> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5678,7 +5678,7 @@ define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5688,9 +5688,9 @@ define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5705,7 +5705,7 @@ define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5717,9 +5717,9 @@ define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5736,7 +5736,7 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5748,9 +5748,9 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5765,9 +5765,9 @@ define <4 x i32> @neon.ushll4s_constant_fold() nounwind sanitize_memory { ; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_fold( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> zeroinitializer, <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5781,7 +5781,7 @@ define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5793,9 +5793,9 @@ define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -5811,7 +5811,7 @@ define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_mem ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5823,9 +5823,9 @@ define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_mem ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -5841,7 +5841,7 @@ define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5871,7 +5871,7 @@ define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5883,9 +5883,9 @@ define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1) ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5901,7 +5901,7 @@ define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5913,9 +5913,9 @@ define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -5938,7 +5938,7 @@ define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5948,9 +5948,9 @@ define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP2]] ; @@ -5965,7 +5965,7 @@ define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind sanitiz ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5992,7 +5992,7 @@ define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6002,9 +6002,9 @@ define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -2)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -2)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP2]] ; @@ -6019,7 +6019,7 @@ define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6031,9 +6031,9 @@ define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -6049,7 +6049,7 @@ define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6061,9 +6061,9 @@ define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i8> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6079,7 +6079,7 @@ define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6091,9 +6091,9 @@ define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6109,7 +6109,7 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6121,9 +6121,9 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6138,9 +6138,9 @@ define <4 x i32> @neon.sshl4s_constant_fold() nounwind sanitize_memory { ; CHECK-LABEL: define <4 x i32> @neon.sshl4s_constant_fold( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> zeroinitializer, <4 x i32> splat (i32 2)) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> splat (i32 2)) ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6154,7 +6154,7 @@ define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6164,9 +6164,9 @@ define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6181,7 +6181,7 @@ define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6193,9 +6193,9 @@ define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6211,7 +6211,7 @@ define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_me ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6223,9 +6223,9 @@ define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_me ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -6241,7 +6241,7 @@ define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6271,7 +6271,7 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind sanitize_memory ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6300,9 +6300,9 @@ define <2 x i64> @neon.sshl2d_constant_fold() nounwind sanitize_memory { ; CHECK-LABEL: define <2 x i64> @neon.sshl2d_constant_fold( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> zeroinitializer, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> zeroinitializer, <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6316,7 +6316,7 @@ define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6326,9 +6326,9 @@ define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 2)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 2)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6343,7 +6343,7 @@ define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6353,13 +6353,13 @@ define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> , <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1), <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1) ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -6376,7 +6376,7 @@ define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6386,13 +6386,13 @@ define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <4 x i16> [[_MSPROP]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6409,7 +6409,7 @@ define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6419,13 +6419,13 @@ define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> , <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <2 x i32> [[_MSPROP]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6442,7 +6442,7 @@ define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6452,9 +6452,9 @@ define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -6469,7 +6469,7 @@ define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6479,9 +6479,9 @@ define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -6496,7 +6496,7 @@ define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6506,9 +6506,9 @@ define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -6523,7 +6523,7 @@ define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6533,9 +6533,9 @@ define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -6550,7 +6550,7 @@ define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6560,9 +6560,9 @@ define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -6577,7 +6577,7 @@ define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6587,9 +6587,9 @@ define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6604,7 +6604,7 @@ define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6614,9 +6614,9 @@ define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6631,7 +6631,7 @@ define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6641,9 +6641,9 @@ define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -6658,7 +6658,7 @@ define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6668,9 +6668,9 @@ define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 8)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 8)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -6685,7 +6685,7 @@ define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6695,9 +6695,9 @@ define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -6712,7 +6712,7 @@ define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6722,9 +6722,9 @@ define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -6739,7 +6739,7 @@ define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6749,9 +6749,9 @@ define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -6766,7 +6766,7 @@ define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6776,9 +6776,9 @@ define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -6793,7 +6793,7 @@ define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6803,9 +6803,9 @@ define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6820,7 +6820,7 @@ define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6830,9 +6830,9 @@ define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6848,7 +6848,7 @@ define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6858,11 +6858,11 @@ define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6891,7 +6891,7 @@ define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6901,11 +6901,11 @@ define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6934,7 +6934,7 @@ define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6944,11 +6944,11 @@ define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6977,7 +6977,7 @@ define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6987,11 +6987,11 @@ define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7020,7 +7020,7 @@ define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7030,11 +7030,11 @@ define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7063,7 +7063,7 @@ define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7073,11 +7073,11 @@ define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7106,7 +7106,7 @@ define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7116,11 +7116,11 @@ define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7149,7 +7149,7 @@ define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7159,11 +7159,11 @@ define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7192,7 +7192,7 @@ define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7206,7 +7206,7 @@ define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP8]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7235,7 +7235,7 @@ define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7245,11 +7245,11 @@ define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7278,7 +7278,7 @@ define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7288,11 +7288,11 @@ define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7321,7 +7321,7 @@ define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7331,11 +7331,11 @@ define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7364,7 +7364,7 @@ define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7374,11 +7374,11 @@ define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7407,7 +7407,7 @@ define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7417,11 +7417,11 @@ define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7450,7 +7450,7 @@ define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7460,11 +7460,11 @@ define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7493,7 +7493,7 @@ define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7503,11 +7503,11 @@ define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7536,7 +7536,7 @@ define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7546,11 +7546,11 @@ define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7579,7 +7579,7 @@ define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7593,7 +7593,7 @@ define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP8]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7622,7 +7622,7 @@ define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7632,11 +7632,11 @@ define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7665,7 +7665,7 @@ define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7675,11 +7675,11 @@ define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7708,7 +7708,7 @@ define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7718,11 +7718,11 @@ define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7751,7 +7751,7 @@ define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7761,11 +7761,11 @@ define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7794,7 +7794,7 @@ define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7804,11 +7804,11 @@ define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7837,7 +7837,7 @@ define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7847,11 +7847,11 @@ define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7880,7 +7880,7 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7890,11 +7890,11 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7923,7 +7923,7 @@ define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7933,11 +7933,11 @@ define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7966,7 +7966,7 @@ define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7976,11 +7976,11 @@ define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8009,7 +8009,7 @@ define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8019,11 +8019,11 @@ define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8052,7 +8052,7 @@ define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8062,11 +8062,11 @@ define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8095,7 +8095,7 @@ define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8105,11 +8105,11 @@ define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8138,7 +8138,7 @@ define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8148,11 +8148,11 @@ define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8181,7 +8181,7 @@ define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8191,11 +8191,11 @@ define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8224,7 +8224,7 @@ define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8234,11 +8234,11 @@ define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8267,7 +8267,7 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8278,7 +8278,7 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8288,11 +8288,11 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], splat (i8 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], splat (i8 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]] @@ -8316,7 +8316,7 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8327,7 +8327,7 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8337,11 +8337,11 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], splat (i16 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]] @@ -8365,7 +8365,7 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8376,7 +8376,7 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8386,11 +8386,11 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]] @@ -8414,7 +8414,7 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8425,7 +8425,7 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8435,11 +8435,11 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], splat (i8 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], splat (i8 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]] @@ -8463,7 +8463,7 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8474,7 +8474,7 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8484,11 +8484,11 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]] @@ -8512,7 +8512,7 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8523,7 +8523,7 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8533,11 +8533,11 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]] @@ -8561,7 +8561,7 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8572,7 +8572,7 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8582,11 +8582,11 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], splat (i64 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]] @@ -8610,7 +8610,7 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8621,7 +8621,7 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8631,11 +8631,11 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], splat (i8 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], splat (i8 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]] @@ -8659,7 +8659,7 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8670,7 +8670,7 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8680,11 +8680,11 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], splat (i16 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]] @@ -8708,7 +8708,7 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8719,7 +8719,7 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8729,11 +8729,11 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]] @@ -8757,7 +8757,7 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8768,7 +8768,7 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8778,11 +8778,11 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], splat (i8 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]] @@ -8806,7 +8806,7 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8817,7 +8817,7 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8827,11 +8827,11 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]] @@ -8855,7 +8855,7 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8866,7 +8866,7 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8876,11 +8876,11 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]] @@ -8904,7 +8904,7 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8915,7 +8915,7 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8925,11 +8925,11 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], splat (i64 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]] @@ -8953,9 +8953,9 @@ define <8 x i16> @shll(<8 x i8> %in) sanitize_memory { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> ; CHECK-NEXT: [[EXT:%.*]] = zext <8 x i8> [[IN]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 8) ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = shl <8 x i16> [[EXT]], +; CHECK-NEXT: [[RES:%.*]] = shl <8 x i16> [[EXT]], splat (i16 8) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[RES]] ; @@ -8969,13 +8969,13 @@ define <4 x i32> @shll_high(<8 x i16> %in) sanitize_memory { ; CHECK-SAME: <8 x i16> [[IN:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <4 x i32> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i16> [[IN]], <8 x i16> undef, <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32> ; CHECK-NEXT: [[EXT:%.*]] = zext <4 x i16> [[EXTRACT]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 16) ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[EXT]], +; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[EXT]], splat (i32 16) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; @@ -8992,7 +8992,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9003,7 +9003,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9018,7 +9018,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9040,7 +9040,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9051,7 +9051,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9066,7 +9066,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9088,7 +9088,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9099,7 +9099,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9114,7 +9114,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9136,7 +9136,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9147,7 +9147,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9162,7 +9162,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9184,7 +9184,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9195,7 +9195,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9210,7 +9210,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9232,7 +9232,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9243,7 +9243,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9258,7 +9258,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9280,7 +9280,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9291,7 +9291,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9306,7 +9306,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9328,7 +9328,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9339,7 +9339,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9354,7 +9354,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9411,7 +9411,7 @@ define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9444,7 +9444,7 @@ define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9477,7 +9477,7 @@ define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9510,7 +9510,7 @@ define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9543,7 +9543,7 @@ define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sani ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9576,7 +9576,7 @@ define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) saniti ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9609,7 +9609,7 @@ define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) saniti ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9675,7 +9675,7 @@ define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) sanitize_memory { ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[A]], +; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[A]], splat (i32 3) ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSPROP]], i32 13) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[B]], i32 13) @@ -9690,5 +9690,5 @@ entry: declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} ;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll index 1453e64abd5a222..b0c71dc8b0851ea 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll @@ -214,7 +214,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 @@ -353,7 +353,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 @@ -429,7 +429,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll index 421f00fcbc56bf7..deeb1d4b6ff8592 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll @@ -24,7 +24,7 @@ define void @st2_8b(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: unreachable @@ -47,9 +47,9 @@ define void @st2_8b_undefA(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_m ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -72,9 +72,9 @@ define void @st2_8b_undefB(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_m ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -96,9 +96,9 @@ define void @st2_8b_undefAB(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_ ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> , <8 x i8> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -125,7 +125,7 @@ define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwind sani ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -149,9 +149,9 @@ define void @st3_8b_undefA(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -175,9 +175,9 @@ define void @st3_8b_undefB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -201,9 +201,9 @@ define void @st3_8b_undefC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -226,9 +226,9 @@ define void @st3_8b_undefAB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -251,9 +251,9 @@ define void @st3_8b_undefAC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -276,9 +276,9 @@ define void @st3_8b_undefBC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -300,9 +300,9 @@ define void @st3_8b_undefABC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) noun ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -330,7 +330,7 @@ define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P) ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -355,9 +355,9 @@ define void @st4_8b_undefA(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -382,9 +382,9 @@ define void @st4_8b_undefB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -409,9 +409,9 @@ define void @st4_8b_undefC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> , <8 x i8> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -436,9 +436,9 @@ define void @st4_8b_undefD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> , ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> splat (i8 -1), ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -462,9 +462,9 @@ define void @st4_8b_undefAB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -488,9 +488,9 @@ define void @st4_8b_undefAC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> , <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -514,9 +514,9 @@ define void @st4_8b_undefBC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> , <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -540,9 +540,9 @@ define void @st4_8b_undefBD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> [[TMP3]], <8 x i8> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -565,9 +565,9 @@ define void @st4_8b_undefABC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> , <8 x i8> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -590,9 +590,9 @@ define void @st4_8b_undefABD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> [[TMP2]], <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -615,9 +615,9 @@ define void @st4_8b_undefACD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> , <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -640,9 +640,9 @@ define void @st4_8b_undefBCD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> , <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -664,9 +664,9 @@ define void @st4_8b_undefABCD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> , <8 x i8> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -698,7 +698,7 @@ define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) nounwind sanitize_memor ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -725,7 +725,7 @@ define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P) nounwind ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -753,7 +753,7 @@ define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -785,7 +785,7 @@ define void @st2_4h(<4 x i16> %A, <4 x i16> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -812,7 +812,7 @@ define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i16> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -840,7 +840,7 @@ define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -872,7 +872,7 @@ define void @st2_8h(<8 x i16> %A, <8 x i16> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -899,7 +899,7 @@ define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -927,7 +927,7 @@ define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -959,7 +959,7 @@ define void @st2_2s(<2 x i32> %A, <2 x i32> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -986,7 +986,7 @@ define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1014,7 +1014,7 @@ define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1044,7 +1044,7 @@ define void @st2_4s(<4 x i32> %A, <4 x i32> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1071,7 +1071,7 @@ define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1099,7 +1099,7 @@ define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1132,7 +1132,7 @@ define void @st2_1d(<1 x i64> %A, <1 x i64> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1159,7 +1159,7 @@ define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1187,7 +1187,7 @@ define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1219,7 +1219,7 @@ define void @st2_2d(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1242,9 +1242,9 @@ define void @st2_2d_undefA(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1267,9 +1267,9 @@ define void @st2_2d_undefB(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1291,9 +1291,9 @@ define void @st2_2d_undefAB(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitiz ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> , <2 x i64> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1320,7 +1320,7 @@ define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1344,9 +1344,9 @@ define void @st3_2d_undefA(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1370,9 +1370,9 @@ define void @st3_2d_undefB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1396,9 +1396,9 @@ define void @st3_2d_undefC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1421,9 +1421,9 @@ define void @st3_2d_undefAB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1446,9 +1446,9 @@ define void @st3_2d_undefAC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1471,9 +1471,9 @@ define void @st3_2d_undefBC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1495,9 +1495,9 @@ define void @st3_2d_undefABC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) n ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1525,7 +1525,7 @@ define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1554,9 +1554,9 @@ define void @st4_2d_undefA(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> % ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1581,9 +1581,9 @@ define void @st4_2d_undefB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> % ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1608,9 +1608,9 @@ define void @st4_2d_undefC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> % ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> , <2 x i64> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1635,9 +1635,9 @@ define void @st4_2d_undefD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> % ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> , ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1661,9 +1661,9 @@ define void @st4_2d_undefAB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1687,9 +1687,9 @@ define void @st4_2d_undefAC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> , <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1713,9 +1713,9 @@ define void @st4_2d_undefAD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1739,9 +1739,9 @@ define void @st4_2d_undefBC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> , <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1765,9 +1765,9 @@ define void @st4_2d_undefBD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> [[TMP3]], <2 x i64> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1791,9 +1791,9 @@ define void @st4_2d_undefCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> , <2 x i64> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1816,9 +1816,9 @@ define void @st4_2d_undefABC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> , <2 x i64> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1841,9 +1841,9 @@ define void @st4_2d_undefABD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> [[TMP2]], <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1866,9 +1866,9 @@ define void @st4_2d_undefACD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> , <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1891,9 +1891,9 @@ define void @st4_2d_undefBCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> , <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1915,9 +1915,9 @@ define void @st4_2d_undefABCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> , <2 x i64> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1929,5 +1929,5 @@ define void @st4_2d_undefABCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64 ret void } ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} ;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index 3d5e4005de5b10c..5bb2c3f0e233b9c 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -43,9 +43,9 @@ define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], splat (i64 63) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64> @@ -71,9 +71,9 @@ define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], splat (i32 31) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32> @@ -305,7 +305,7 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -326,7 +326,7 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -347,7 +347,7 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -368,7 +368,7 @@ define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -389,7 +389,7 @@ define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -503,7 +503,7 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -527,7 +527,7 @@ define <2 x double> @test_x86_avx_maskload_pd(ptr %a0, <2 x i64> %mask) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -551,7 +551,7 @@ define <4 x double> @test_x86_avx_maskload_pd_256(ptr %a0, <4 x i64> %mask) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -575,7 +575,7 @@ define <4 x float> @test_x86_avx_maskload_ps(ptr %a0, <4 x i32> %mask) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -599,7 +599,7 @@ define <8 x float> @test_x86_avx_maskload_ps_256(ptr %a0, <8 x i32> %mask) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -627,7 +627,7 @@ define void @test_x86_avx_maskstore_pd(ptr %a0, <2 x i64> %mask, <2 x double> %a ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -654,7 +654,7 @@ define void @test_x86_avx_maskstore_pd_256(ptr %a0, <4 x i64> %mask, <4 x double ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -681,7 +681,7 @@ define void @test_x86_avx_maskstore_ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -708,7 +708,7 @@ define void @test_x86_avx_maskstore_ps_256(ptr %a0, <8 x i32> %mask, <8 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -792,7 +792,7 @@ define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -813,7 +813,7 @@ define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -905,7 +905,7 @@ define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -926,7 +926,7 @@ define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -964,7 +964,7 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -989,7 +989,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1009,7 +1009,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1032,7 +1032,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) # ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1050,7 +1050,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1065,7 +1065,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1091,7 +1091,7 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1363,11 +1363,11 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], -; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> , <4 x i32> +; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], splat (i64 1) +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> splat (i64 -1), <4 x i32> ; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1376,7 +1376,7 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META1:![0-9]+]] +; CHECK-NEXT: store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META2:![0-9]+]] ; CHECK-NEXT: ret void ; %a2 = add <2 x i64> %a1, @@ -1392,7 +1392,7 @@ define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1401,7 +1401,7 @@ define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META1]] +; CHECK-NEXT: store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META2]] ; CHECK-NEXT: ret void ; tail call void @llvm.x86.avx.movnt.ps.256(ptr %p, <8 x float> %a) nounwind @@ -1418,7 +1418,7 @@ define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 { ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[A2:%.*]] = fadd <4 x double> [[A1:%.*]], zeroinitializer ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1427,7 +1427,7 @@ define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META1]] +; CHECK-NEXT: store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META2]] ; CHECK-NEXT: ret void ; %a2 = fadd <4 x double> %a1, diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index 5efb7eb40789872..1602e85d8516d23 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -160,7 +160,7 @@ define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i8> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: unreachable @@ -216,7 +216,7 @@ define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[RES]] @@ -489,7 +489,7 @@ define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, ptr %p) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -684,7 +684,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -797,7 +797,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -817,7 +817,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -832,7 +832,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -885,9 +885,9 @@ define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], splat (i8 7) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], splat (i8 7) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]] @@ -978,7 +978,7 @@ define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1002,7 +1002,7 @@ define <2 x i64> @test_x86_avx2_maskload_q(ptr %a0, <2 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1026,7 +1026,7 @@ define <4 x i64> @test_x86_avx2_maskload_q_256(ptr %a0, <4 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1050,7 +1050,7 @@ define <4 x i32> @test_x86_avx2_maskload_d(ptr %a0, <4 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1074,7 +1074,7 @@ define <8 x i32> @test_x86_avx2_maskload_d_256(ptr %a0, <8 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1102,7 +1102,7 @@ define void @test_x86_avx2_maskstore_q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) #0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1129,7 +1129,7 @@ define void @test_x86_avx2_maskstore_q_256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1156,7 +1156,7 @@ define void @test_x86_avx2_maskstore_d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) #0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1183,7 +1183,7 @@ define void @test_x86_avx2_maskstore_d_256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1438,7 +1438,7 @@ define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> ) ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> splat (i64 4), <2 x i64> ) ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -1471,7 +1471,7 @@ define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> ) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> splat (i64 4), <4 x i64> ) ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[RES]] ; @@ -1560,7 +1560,7 @@ define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, ptr %a1, <4 x i ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1593,7 +1593,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, ptr %a1, <4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1626,7 +1626,7 @@ define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, ptr %a1, <2 x i ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1659,7 +1659,7 @@ define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, ptr %a1, <4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1692,7 +1692,7 @@ define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, ptr %a1, <4 x i32 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1725,7 +1725,7 @@ define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, ptr %a1, <8 x ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1758,7 +1758,7 @@ define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, ptr %a1, <2 x i64 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1791,7 +1791,7 @@ define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, ptr %a1, <4 x ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1824,7 +1824,7 @@ define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, ptr %a1, <4 x i32> %id ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1857,7 +1857,7 @@ define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, ptr %a1, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1890,7 +1890,7 @@ define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, ptr %a1, <2 x i64> %id ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1923,7 +1923,7 @@ define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, ptr %a1, <4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1956,7 +1956,7 @@ define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, ptr %a1, <4 x i32> %id ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1989,7 +1989,7 @@ define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, ptr %a1, <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -2022,7 +2022,7 @@ define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, ptr %a1, <2 x i64> %id ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -2055,7 +2055,7 @@ define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, ptr %a1, <4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -2089,14 +2089,14 @@ define <8 x float> @test_gather_mask(<8 x float> %a0, ptr %a, <8 x i32> %idx, < ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable ; CHECK: 10: ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 4) ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] ; CHECK: 11: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -2136,7 +2136,7 @@ define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, ptr %a1, <2 x i64> %idx ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSPROP]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index e9323f6dd330833..9d075f7974cd91c 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -162,9 +162,9 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) @@ -183,9 +183,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) @@ -206,9 +206,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 @@ -218,9 +218,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 9: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]]) @@ -243,9 +243,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) @@ -263,9 +263,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) @@ -283,9 +283,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 @@ -295,9 +295,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 9: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]]) @@ -317,9 +317,9 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]]) @@ -338,9 +338,9 @@ define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[A0:%.*]]) @@ -361,9 +361,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) #0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1:%.*]]) @@ -382,9 +382,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16 @@ -395,9 +395,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 11: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) @@ -416,9 +416,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16 @@ -429,9 +429,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 11: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) @@ -450,9 +450,9 @@ define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) @@ -471,9 +471,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) @@ -494,9 +494,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 @@ -506,9 +506,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 9: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]]) @@ -531,9 +531,9 @@ define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]]) @@ -552,9 +552,9 @@ define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[A0:%.*]]) @@ -639,9 +639,9 @@ define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]]) @@ -810,9 +810,9 @@ define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]]) @@ -866,7 +866,7 @@ define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] @@ -1139,9 +1139,9 @@ define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, ptr %p) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 @@ -1365,9 +1365,9 @@ define void @clflush(ptr %p) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(ptr [[P:%.*]]) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll index a71455821bd6bd5..f5e8452fd0d5856 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll @@ -11,9 +11,9 @@ define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 63) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64> @@ -39,9 +39,9 @@ define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], splat (i32 31) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32> @@ -112,7 +112,7 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -138,7 +138,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -158,7 +158,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -173,7 +173,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -226,9 +226,9 @@ define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], splat (i8 7) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 7) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]] @@ -322,7 +322,7 @@ define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -343,7 +343,7 @@ define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -380,7 +380,7 @@ define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, ptr %a1) #0 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -407,7 +407,7 @@ define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, ptr %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll index 50065e259706594..5bb2c3f0e233b9c 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll @@ -43,9 +43,9 @@ define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], splat (i64 63) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64> @@ -71,9 +71,9 @@ define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], splat (i32 31) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32> @@ -1363,8 +1363,8 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], -; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> , <4 x i32> +; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], splat (i64 1) +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> splat (i64 -1), <4 x i32> ; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll index 6d9a78be2c20973..1602e85d8516d23 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll @@ -216,7 +216,7 @@ define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[RES]] @@ -885,9 +885,9 @@ define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], splat (i8 7) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], splat (i8 7) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]] @@ -1438,7 +1438,7 @@ define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> ) ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> splat (i64 4), <2 x i64> ) ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -1471,7 +1471,7 @@ define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> ) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> splat (i64 4), <4 x i64> ) ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll index 8b8ca74f8e6dcfe..9d075f7974cd91c 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll @@ -866,7 +866,7 @@ define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll index 3a1e27d5a09d1c2..f5e8452fd0d5856 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll @@ -11,9 +11,9 @@ define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 63) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64> @@ -39,9 +39,9 @@ define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], splat (i32 31) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32> @@ -226,9 +226,9 @@ define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], splat (i8 7) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 7) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll b/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll index 24fde53eba63a93..ff37605acaddd98 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll @@ -42,7 +42,7 @@ define void @Store(ptr %p, <4 x i64> %v, <4 x i1> %mask) sanitize_memory { ; ADDR-NEXT: [[TMP6:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i4 [[TMP6]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0:![0-9]+]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1:![0-9]+]] ; ADDR: 7: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]] ; ADDR-NEXT: unreachable @@ -112,7 +112,7 @@ define <4 x double> @Load(ptr %p, <4 x double> %v, <4 x i1> %mask) sanitize_memo ; ADDR-NEXT: [[TMP6:%.*]] = bitcast <4 x i1> [[TMP1]] to i4 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i4 [[TMP6]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; ADDR: 7: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable @@ -238,7 +238,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i64> [[TMP2]], splat (i64 87960930222080) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <16 x i64> [[TMP3]] to <16 x ptr> ; CHECK-NEXT: [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP4]], i32 4, <16 x i1> [[MASK:%.*]], <16 x i32> [[TMP1]]) ; CHECK-NEXT: [[RET:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[PTRS]], i32 4, <16 x i1> [[MASK]], <16 x float> [[PASSTHRU:%.*]]) @@ -252,7 +252,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas ; ADDR-NEXT: call void @llvm.donothing() ; ADDR-NEXT: [[_MSMASKEDPTRS:%.*]] = select <16 x i1> [[MASK:%.*]], <16 x i64> [[TMP2]], <16 x i64> zeroinitializer ; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64> -; ADDR-NEXT: [[TMP5:%.*]] = xor <16 x i64> [[TMP4]], +; ADDR-NEXT: [[TMP5:%.*]] = xor <16 x i64> [[TMP4]], splat (i64 87960930222080) ; ADDR-NEXT: [[TMP6:%.*]] = inttoptr <16 x i64> [[TMP5]] to <16 x ptr> ; ADDR-NEXT: [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP6]], i32 4, <16 x i1> [[MASK]], <16 x i32> [[TMP3]]) ; ADDR-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[TMP1]] to i16 @@ -260,7 +260,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas ; ADDR-NEXT: [[TMP8:%.*]] = bitcast <16 x i64> [[_MSMASKEDPTRS]] to i1024 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i1024 [[TMP8]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; ADDR: 9: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable @@ -274,9 +274,9 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas ; ORIGINS-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 136) to ptr), align 4 ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP3:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64> -; ORIGINS-NEXT: [[TMP4:%.*]] = xor <16 x i64> [[TMP3]], +; ORIGINS-NEXT: [[TMP4:%.*]] = xor <16 x i64> [[TMP3]], splat (i64 87960930222080) ; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr <16 x i64> [[TMP4]] to <16 x ptr> -; ORIGINS-NEXT: [[TMP6:%.*]] = add <16 x i64> [[TMP4]], +; ORIGINS-NEXT: [[TMP6:%.*]] = add <16 x i64> [[TMP4]], splat (i64 17592186044416) ; ORIGINS-NEXT: [[TMP7:%.*]] = inttoptr <16 x i64> [[TMP6]] to <16 x ptr> ; ORIGINS-NEXT: [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP5]], i32 4, <16 x i1> [[MASK:%.*]], <16 x i32> [[TMP1]]) ; ORIGINS-NEXT: [[RET:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[PTRS]], i32 4, <16 x i1> [[MASK]], <16 x float> [[PASSTHRU:%.*]]) @@ -319,7 +319,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 87960930222080) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <8 x i64> [[TMP3]] to <8 x ptr> ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> [[MASK:%.*]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -332,7 +332,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize ; ADDR-NEXT: call void @llvm.donothing() ; ADDR-NEXT: [[_MSMASKEDPTRS:%.*]] = select <8 x i1> [[MASK:%.*]], <8 x i64> [[TMP2]], <8 x i64> zeroinitializer ; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; ADDR-NEXT: [[TMP5:%.*]] = xor <8 x i64> [[TMP4]], +; ADDR-NEXT: [[TMP5:%.*]] = xor <8 x i64> [[TMP4]], splat (i64 87960930222080) ; ADDR-NEXT: [[TMP6:%.*]] = inttoptr <8 x i64> [[TMP5]] to <8 x ptr> ; ADDR-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP3]], <8 x ptr> [[TMP6]], i32 8, <8 x i1> [[MASK]]) ; ADDR-NEXT: [[TMP7:%.*]] = bitcast <8 x i1> [[TMP1]] to i8 @@ -340,7 +340,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize ; ADDR-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[_MSMASKEDPTRS]] to i512 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; ADDR: 9: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable @@ -353,9 +353,9 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize ; ORIGINS-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP3:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; ORIGINS-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[TMP3]], +; ORIGINS-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[TMP3]], splat (i64 87960930222080) ; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr <8 x i64> [[TMP4]] to <8 x ptr> -; ORIGINS-NEXT: [[TMP6:%.*]] = add <8 x i64> [[TMP4]], +; ORIGINS-NEXT: [[TMP6:%.*]] = add <8 x i64> [[TMP4]], splat (i64 17592186044416) ; ORIGINS-NEXT: [[TMP7:%.*]] = inttoptr <8 x i64> [[TMP6]] to <8 x ptr> ; ORIGINS-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP5]], i32 8, <8 x i1> [[MASK:%.*]]) ; ORIGINS-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -369,7 +369,7 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask ; CHECK-LABEL: @ScatterNoSanitize( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080) ; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr> ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK:%.*]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -379,7 +379,7 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask ; ADDR-NEXT: call void @llvm.donothing() ; ADDR-NEXT: [[_MSMASKEDPTRS:%.*]] = select <8 x i1> [[MASK:%.*]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer ; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; ADDR-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], +; ADDR-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080) ; ADDR-NEXT: [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr> ; ADDR-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK]]) ; ADDR-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -388,9 +388,9 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask ; ORIGINS-LABEL: @ScatterNoSanitize( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; ORIGINS-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], +; ORIGINS-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080) ; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr> -; ORIGINS-NEXT: [[TMP4:%.*]] = add <8 x i64> [[TMP2]], +; ORIGINS-NEXT: [[TMP4:%.*]] = add <8 x i64> [[TMP2]], splat (i64 17592186044416) ; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr <8 x i64> [[TMP4]] to <8 x ptr> ; ORIGINS-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK:%.*]]) ; ORIGINS-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -426,7 +426,7 @@ define <16 x float> @ExpandLoad(ptr %ptr, <16 x i1> %mask, <16 x float> %passthr ; ADDR-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[TMP2]] to i16 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP7]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ADDR: 8: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable @@ -504,7 +504,7 @@ define void @CompressStore(<16 x float> %value, ptr %ptr, <16 x i1> %mask) sanit ; ADDR-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[TMP2]] to i16 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP7]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ADDR: 8: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll index 8746f7f19023e4e..3ce200e2222df16 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll @@ -2086,12 +2086,12 @@ define <2 x i1> @ICmpSLT_vector_Zero(<2 x ptr> %x) nounwind uwtable readnone san ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <2 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <2 x i64> [[TMP5]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <2 x i64> [[TMP6]], splat (i64 -9223372036854775808) ; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer ; CHECK-NEXT: store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8 @@ -2103,12 +2103,12 @@ define <2 x i1> @ICmpSLT_vector_Zero(<2 x ptr> %x) nounwind uwtable readnone san ; ORIGIN-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; ORIGIN-NEXT: call void @llvm.donothing() ; ORIGIN-NEXT: [[TMP3:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64> -; ORIGIN-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP3]], -; ORIGIN-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], +; ORIGIN-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -9223372036854775808) +; ORIGIN-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; ORIGIN-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP4]], [[TMP5]] ; ORIGIN-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP4]], [[TMP1]] -; ORIGIN-NEXT: [[TMP10:%.*]] = icmp ult <2 x i64> [[TMP6]], -; ORIGIN-NEXT: [[TMP17:%.*]] = icmp ult <2 x i64> [[TMP7]], +; ORIGIN-NEXT: [[TMP10:%.*]] = icmp ult <2 x i64> [[TMP6]], splat (i64 -9223372036854775808) +; ORIGIN-NEXT: [[TMP17:%.*]] = icmp ult <2 x i64> [[TMP7]], splat (i64 -9223372036854775808) ; ORIGIN-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP10]], [[TMP17]] ; ORIGIN-NEXT: [[TMP19:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer ; ORIGIN-NEXT: store <2 x i1> [[TMP18]], ptr @__msan_retval_tls, align 8 @@ -2121,12 +2121,12 @@ define <2 x i1> @ICmpSLT_vector_Zero(<2 x ptr> %x) nounwind uwtable readnone san ; CALLS-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; CALLS-NEXT: call void @llvm.donothing() ; CALLS-NEXT: [[TMP3:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64> -; CALLS-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP3]], -; CALLS-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], +; CALLS-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -9223372036854775808) +; CALLS-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CALLS-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP4]], [[TMP5]] ; CALLS-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP4]], [[TMP1]] -; CALLS-NEXT: [[TMP10:%.*]] = icmp ult <2 x i64> [[TMP6]], -; CALLS-NEXT: [[TMP17:%.*]] = icmp ult <2 x i64> [[TMP7]], +; CALLS-NEXT: [[TMP10:%.*]] = icmp ult <2 x i64> [[TMP6]], splat (i64 -9223372036854775808) +; CALLS-NEXT: [[TMP17:%.*]] = icmp ult <2 x i64> [[TMP7]], splat (i64 -9223372036854775808) ; CALLS-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP10]], [[TMP17]] ; CALLS-NEXT: [[TMP19:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer ; CALLS-NEXT: store <2 x i1> [[TMP18]], ptr @__msan_retval_tls, align 8 @@ -2146,14 +2146,14 @@ define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone ; CHECK-SAME: <2 x i32> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> , [[TMP5]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <2 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP4]] ; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP8]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <2 x i32> , [[X]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <2 x i32> splat (i32 -1), [[X]] ; CHECK-NEXT: store <2 x i1> [[TMP16]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i1> [[TMP17]] ; @@ -2162,17 +2162,17 @@ define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone ; ORIGIN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; ORIGIN-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; ORIGIN-NEXT: call void @llvm.donothing() -; ORIGIN-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[X]], -; ORIGIN-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], +; ORIGIN-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[X]], splat (i32 -2147483648) +; ORIGIN-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; ORIGIN-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP3]], [[TMP4]] ; ORIGIN-NEXT: [[TMP6:%.*]] = or <2 x i32> [[TMP3]], [[TMP1]] -; ORIGIN-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> , [[TMP6]] -; ORIGIN-NEXT: [[TMP16:%.*]] = icmp ult <2 x i32> , [[TMP5]] +; ORIGIN-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP6]] +; ORIGIN-NEXT: [[TMP16:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP5]] ; ORIGIN-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]] ; ORIGIN-NEXT: [[TMP18:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 ; ORIGIN-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0 ; ORIGIN-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 0 -; ORIGIN-NEXT: [[TMP21:%.*]] = icmp slt <2 x i32> , [[X]] +; ORIGIN-NEXT: [[TMP21:%.*]] = icmp slt <2 x i32> splat (i32 -1), [[X]] ; ORIGIN-NEXT: store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8 ; ORIGIN-NEXT: store i32 [[TMP20]], ptr @__msan_retval_origin_tls, align 4 ; ORIGIN-NEXT: ret <2 x i1> [[TMP21]] @@ -2182,17 +2182,17 @@ define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone ; CALLS-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CALLS-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; CALLS-NEXT: call void @llvm.donothing() -; CALLS-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[X]], -; CALLS-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], +; CALLS-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[X]], splat (i32 -2147483648) +; CALLS-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CALLS-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP3]], [[TMP4]] ; CALLS-NEXT: [[TMP6:%.*]] = or <2 x i32> [[TMP3]], [[TMP1]] -; CALLS-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> , [[TMP6]] -; CALLS-NEXT: [[TMP16:%.*]] = icmp ult <2 x i32> , [[TMP5]] +; CALLS-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP6]] +; CALLS-NEXT: [[TMP16:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP5]] ; CALLS-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]] ; CALLS-NEXT: [[TMP18:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 ; CALLS-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0 ; CALLS-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 0 -; CALLS-NEXT: [[TMP21:%.*]] = icmp slt <2 x i32> , [[X]] +; CALLS-NEXT: [[TMP21:%.*]] = icmp slt <2 x i32> splat (i32 -1), [[X]] ; CALLS-NEXT: store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8 ; CALLS-NEXT: store i32 [[TMP20]], ptr @__msan_retval_origin_tls, align 4 ; CALLS-NEXT: ret <2 x i1> [[TMP21]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/reduce.ll b/llvm/test/Instrumentation/MemorySanitizer/reduce.ll index 669ccf97f74c3e7..54e87b9e10a509d 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/reduce.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/reduce.ll @@ -74,7 +74,7 @@ define i32 @reduce_or() sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <3 x i32>, ptr [[TMP3]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = xor <3 x i32> [[O]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <3 x i32> [[O]], splat (i32 -1) ; CHECK-NEXT: [[TMP8:%.*]] = or <3 x i32> [[TMP7]], [[_MSLD]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.or.v3i32(<3 x i32> [[_MSLD]]) diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll index 98a9cdc69604944..05d4d2a6551f5ed 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -S -passes="msan" -msan-instrumentation-with-call-threshold=0 | FileCheck %s ; ; This test illustrates a bug in MemorySanitizer that will shortly be fixed @@ -16,7 +16,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[DOTPRE:%.*]] = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[DOTPRE:%.*]] = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa [[TBAA1:![0-9]+]] ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576), i64 35184372088832) to ptr), align 8 ; CHECK-NEXT: br label %[[FOR_COND:.*]] @@ -24,10 +24,10 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi <4 x i16> [ [[_MSLD]], %[[ENTRY]] ], [ [[_MSLD3:%.*]], %[[FOR_COND]] ] ; CHECK-NEXT: [[_MSPHI_O:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP11:%.*]], %[[FOR_COND]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x i16> [ [[DOTPRE]], %[[ENTRY]] ], [ [[TMP5:%.*]], %[[FOR_COND]] ] -; CHECK-NEXT: [[_MSPHI_S1:%.*]] = phi <4 x i16> [ , %[[ENTRY]] ], [ [[_MSLD3]], %[[FOR_COND]] ] +; CHECK-NEXT: [[_MSPHI_S1:%.*]] = phi <4 x i16> [ splat (i16 -1), %[[ENTRY]] ], [ [[_MSLD3]], %[[FOR_COND]] ] ; CHECK-NEXT: [[_MSPHI_O2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP11]], %[[FOR_COND]] ] ; CHECK-NEXT: [[E_0:%.*]] = phi <4 x i16> [ undef, %[[ENTRY]] ], [ [[TMP5]], %[[FOR_COND]] ] -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSPHI_S1]], <4 x i16> , <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSPHI_S1]], <4 x i16> splat (i16 -1), <4 x i32> ; CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[E_0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[_MSPHI_S]] to i64 ; CHECK-NEXT: call void @__msan_maybe_warning_8(i64 zeroext [[TMP2]], i32 zeroext [[_MSPHI_O]]) @@ -36,7 +36,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-NEXT: [[CALL:%.*]] = tail call noundef i32 @_Z1b11__Int16x4_tS_(<4 x i16> noundef [[TMP1]], <4 x i16> noundef [[LANE]]) ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[CONV]] to ptr -; CHECK-NEXT: [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[TBAA1]] ; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr @@ -47,8 +47,8 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-NEXT: store <4 x i16> [[_MSLD3]], ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[_MSLD3]] to i64 ; CHECK-NEXT: call void @__msan_maybe_store_origin_8(i64 zeroext [[TMP12]], ptr @_Z1cv, i32 zeroext [[TMP11]]) -; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[TBAA1]] +; CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] ; entry: %.pre = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa !2 @@ -76,9 +76,9 @@ attributes #0 = { mustprogress noreturn nounwind sanitize_memory "no-trapping-ma !5 = distinct !{!5, !6} !6 = !{!"llvm.loop.mustprogress"} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} -; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} -; CHECK: [[META2]] = !{!"Simple C++ TBAA"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} -; CHECK: [[META4]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]} +; CHECK: [[META5]] = !{!"llvm.loop.mustprogress"} ;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll index 0e2c0e3d8594151..d614bb85d858499 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll @@ -65,7 +65,7 @@ define <2 x i64> @Test_x86_sse2_psad_bw(<16 x i8> %a, <16 x i8> %b) sanitize_mem ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[C:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A]], <16 x i8> [[B]]) ; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[C]] diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll index 2131162bf4bf3f8..03c5e917f07650d 100644 --- a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll @@ -134,8 +134,8 @@ define <2 x float> @return_param_add_return_float_vector(<2 x float> %a) sanitiz ; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[A:%.*]] to <2 x double> ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]] ; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 -; CHECK-NEXT: [[B:%.*]] = fadd <2 x float> [[A]], -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[B:%.*]] = fadd <2 x float> [[A]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[B]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP6]], double [[TMP7]], i32 1, i64 0) @@ -396,8 +396,8 @@ define void @load_add_store_vector(<2 x float>* %a) sanitize_numerical_stability ; CHECK-NEXT: br label [[TMP6]] ; CHECK: 6: ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x double> [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ] -; CHECK-NEXT: [[C:%.*]] = fadd <2 x float> [[B]], -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], +; CHECK-NEXT: [[C:%.*]] = fadd <2 x float> [[B]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[A]], i64 2) ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[C]], i64 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP8]], i64 0 @@ -772,8 +772,8 @@ define void @vector_shuffle(<2 x float> %0) sanitize_numerical_stability { ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double> ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]] ; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> , <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> , <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> splat (float 1.000000e+00), <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> splat (double 1.000000e+00), <2 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll index f26463815c16d68..418ad0da26a95f4 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll @@ -200,8 +200,8 @@ define i32 @f16_i8(half %in) { define <2 x i64> @v2f32_i32(<2 x float> %in) { ; CHECK-BASE-LABEL: @v2f32_i32( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> splat (i64 2147483647)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> splat (i64 -2147483648)) ; CHECK-BASE-NEXT: ret <2 x i64> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v2f32_i32( @@ -223,8 +223,8 @@ define <2 x i64> @v2f32_i32(<2 x float> %in) { define <4 x i64> @v4f32_i32(<4 x float> %in) { ; CHECK-BASE-LABEL: @v4f32_i32( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> splat (i64 2147483647)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> splat (i64 -2147483648)) ; CHECK-BASE-NEXT: ret <4 x i64> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v4f32_i32( @@ -246,8 +246,8 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) { define <8 x i64> @v8f32_i32(<8 x float> %in) { ; CHECK-BASE-LABEL: @v8f32_i32( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> splat (i64 2147483647)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> splat (i64 -2147483648)) ; CHECK-BASE-NEXT: ret <8 x i64> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v8f32_i32( @@ -269,8 +269,8 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) { define <4 x i32> @v4f16_i16(<4 x half> %in) { ; CHECK-BASE-LABEL: @v4f16_i16( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> splat (i32 32767)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> splat (i32 -32768)) ; CHECK-BASE-NEXT: ret <4 x i32> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v4f16_i16( @@ -292,8 +292,8 @@ define <4 x i32> @v4f16_i16(<4 x half> %in) { define <8 x i32> @v8f16_i16(<8 x half> %in) { ; CHECK-BASE-LABEL: @v8f16_i16( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> splat (i32 32767)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> splat (i32 -32768)) ; CHECK-BASE-NEXT: ret <8 x i32> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v8f16_i16( diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll index 4050da245c88df1..252f31ec6874766 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll @@ -159,8 +159,8 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) { define <8 x i32> @v8f16_i16(<8 x half> %in) { ; CHECK-LABEL: @v8f16_i16( ; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) +; CHECK-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> splat (i32 32767)) +; CHECK-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> splat (i32 -32768)) ; CHECK-NEXT: ret <8 x i32> [[MAX]] ; %conv = fptosi <8 x half> %in to <8 x i32> diff --git a/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll b/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll index 9eb7ef1a1d46ee2..5c914deb089671f 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll @@ -18,7 +18,7 @@ define i32 @anyset_two_bit_mask(i32 %x) { define <2 x i32> @anyset_two_bit_mask_uniform(<2 x i32> %x) { ; CHECK-LABEL: @anyset_two_bit_mask_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 9) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] @@ -48,7 +48,7 @@ define i32 @anyset_four_bit_mask(i32 %x) { define <2 x i32> @anyset_four_bit_mask_uniform(<2 x i32> %x) { ; CHECK-LABEL: @anyset_four_bit_mask_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 297) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] @@ -83,7 +83,7 @@ define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) { define <2 x i32> @anyset_three_bit_mask_all_shifted_bits_uniform(<2 x i32> %x) { ; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 296) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] @@ -114,8 +114,8 @@ define i32 @allset_two_bit_mask(i32 %x) { define <2 x i32> @allset_two_bit_mask_uniform(<2 x i32> %x) { ; CHECK-LABEL: @allset_two_bit_mask_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 129) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 129) ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll index f6369a95bccf928..4a89705e17749cf 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll @@ -19,7 +19,7 @@ define signext i32 @popcount8(i8 zeroext %0) { ; CHECK-NEXT: [[TMP9:%.*]] = lshr i8 [[TMP8]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i8 [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 15 -; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: ret i32 [[TMP12]] ; %2 = lshr i8 %0, 1 @@ -32,8 +32,8 @@ define signext i32 @popcount8(i8 zeroext %0) { %9 = lshr i8 %8, 4 %10 = add nuw nsw i8 %9, %8 %11 = and i8 %10, 15 - %12 = zext i8 %11 to i32 - ret i32 %12 + %12 = zext i8 %11 to i32 + ret i32 %12 } ;int popcount32(unsigned i) { @@ -98,7 +98,7 @@ define signext i32 @popcount64(i64 %0) { ; y <<= 64; ; y |= 0x3333333333333333; ; __uint128_t z = 0x0f0f0f0f0f0f0f0f; -; z <<= 64; +; z <<= 64; ; z |= 0x0f0f0f0f0f0f0f0f; ; __uint128_t a = 0x0101010101010101; ; a <<= 64; @@ -112,7 +112,7 @@ define signext i32 @popcount64(i64 %0) { define signext i32 @popcount128(i128 %0) { ; CHECK-LABEL: @popcount128( ; CHECK-NEXT: [[TMP2:%.*]] = call i128 @llvm.ctpop.i128(i128 [[TMP0:%.*]]) -; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = lshr i128 %0, 1 @@ -126,9 +126,9 @@ define signext i32 @popcount128(i128 %0) { %10 = add nuw nsw i128 %9, %8 %11 = and i128 %10, 20016609818878733144904388672456953615 %12 = mul i128 %11, 1334440654591915542993625911497130241 - %13 = lshr i128 %12, 120 - %14 = trunc i128 %13 to i32 - ret i32 %14 + %13 = lshr i128 %12, 120 + %14 = trunc i128 %13 to i32 + ret i32 %14 } ;vector unsigned char popcount8vec(vector unsigned char i) @@ -140,16 +140,16 @@ define signext i32 @popcount128(i128 %0) { ;} define <16 x i8> @popcount8vec(<16 x i8> %0) { ; CHECK-LABEL: @popcount8vec( -; CHECK-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[TMP0:%.*]], splat (i8 1) +; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], splat (i8 85) ; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i8> [[TMP0]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i8> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i8> [[TMP6]], +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i8> [[TMP4]], splat (i8 51) +; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], splat (i8 2) +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i8> [[TMP6]], splat (i8 51) ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i8> [[TMP7]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], splat (i8 4) ; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw <16 x i8> [[TMP9]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP10]], splat (i8 15) ; CHECK-NEXT: ret <16 x i8> [[TMP11]] ; %2 = lshr <16 x i8> %0, diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll index b54963eda9d1f40..0a5c710a8b70ab8 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll @@ -116,7 +116,7 @@ define void @multi_uses_sub(i32 %X, i32 %Y) { define void @multi_use_vec_add(<2 x i32> %X) { ; CHECK-LABEL: @multi_use_vec_add( ; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[B1:%.*]] = add <2 x i32> [[X]], +; CHECK-NEXT: [[B1:%.*]] = add <2 x i32> [[X]], splat (i32 15) ; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]]) @@ -135,7 +135,7 @@ define void @multi_use_vec_add(<2 x i32> %X) { define void @multi_use_vec_or(<2 x i32> %X) { ; CHECK-LABEL: @multi_use_vec_or( ; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[B1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[B1:%.*]] = or <2 x i32> [[X]], splat (i32 15) ; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]]) @@ -154,7 +154,7 @@ define void @multi_use_vec_or(<2 x i32> %X) { define void @multi_use_vec_xor(<2 x i32> %X) { ; CHECK-LABEL: @multi_use_vec_xor( ; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[B1:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[B1:%.*]] = xor <2 x i32> [[X]], splat (i32 15) ; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]]) @@ -173,7 +173,7 @@ define void @multi_use_vec_xor(<2 x i32> %X) { define void @multi_use_vec_and(<2 x i32> %X) { ; CHECK-LABEL: @multi_use_vec_and( ; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[B1:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[B1:%.*]] = and <2 x i32> [[X]], splat (i32 15) ; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]]) diff --git a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll index 170fde4ef7002e0..65595962001a43d 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll @@ -11,7 +11,7 @@ define <8 x i16> @or-load-fixed-length-vector(ptr %p1) { ; CHECK-NEXT: [[L2:%.*]] = load <8 x i8>, ptr [[P2]], align 1 ; CHECK-NEXT: [[E1:%.*]] = zext <8 x i8> [[L1]] to <8 x i16> ; CHECK-NEXT: [[E2:%.*]] = zext <8 x i8> [[L2]] to <8 x i16> -; CHECK-NEXT: [[S2:%.*]] = shl <8 x i16> [[E2]], +; CHECK-NEXT: [[S2:%.*]] = shl <8 x i16> [[E2]], splat (i16 8) ; CHECK-NEXT: [[OR:%.*]] = or <8 x i16> [[E1]], [[S2]] ; CHECK-NEXT: ret <8 x i16> [[OR]] ; diff --git a/llvm/test/Transforms/Attributor/nofpclass-powi.ll b/llvm/test/Transforms/Attributor/nofpclass-powi.ll index 176f770db0a5843..1d59907a34f19af 100644 --- a/llvm/test/Transforms/Attributor/nofpclass-powi.ll +++ b/llvm/test/Transforms/Attributor/nofpclass-powi.ll @@ -104,7 +104,7 @@ define float @ret_powi_f32_masked_to_even_extrabits(float %arg0, i32 %arg1) #0 { define <2 x float> @ret_powi_v2f32_masked_to_even(<2 x float> %arg0, <2 x i32> %arg1) #0 { ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) <2 x float> @ret_powi_v2f32_masked_to_even ; CHECK-SAME: (<2 x float> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[KNOWN_EVEN:%.*]] = and <2 x i32> [[ARG1]], +; CHECK-NEXT: [[KNOWN_EVEN:%.*]] = and <2 x i32> [[ARG1]], splat (i32 -2) ; CHECK-NEXT: [[CALL:%.*]] = call nofpclass(ninf nzero nsub nnorm) <2 x float> @llvm.powi.v2f32.v2i32(<2 x float> [[ARG0]], <2 x i32> [[KNOWN_EVEN]]) #[[ATTR6]] ; CHECK-NEXT: ret <2 x float> [[CALL]] ; @@ -277,7 +277,7 @@ define <4 x float> @powi_v4f32_i32_regression(<4 x float> %arg) { ; CHECK-LABEL: define nofpclass(nzero) <4 x float> @powi_v4f32_i32_regression ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR5]] { ; CHECK-NEXT: [[POWI:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[ARG]], i32 noundef 4) #[[ATTR6]] -; CHECK-NEXT: [[USER:%.*]] = fsub <4 x float> [[POWI]], +; CHECK-NEXT: [[USER:%.*]] = fsub <4 x float> [[POWI]], splat (float 1.000000e+00) ; CHECK-NEXT: ret <4 x float> [[USER]] ; %powi = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %arg, i32 4) diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index 2a6780b60211cfd..5afe5e90d802bc6 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -96,7 +96,7 @@ define <2 x double> @returned_zero_vector() { define <2 x double> @returned_negzero_vector() { ; CHECK-LABEL: define noundef nofpclass(nan inf pzero sub norm) <2 x double> @returned_negzero_vector() { ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double -0.000000e+00) ; call void @unknown() ret <2 x double> @@ -2909,7 +2909,7 @@ define <2 x float> @bitcast_to_float_vect_nnan(<2 x i32> %arg) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) <2 x float> @bitcast_to_float_vect_nnan ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[ARG]], splat (i32 4) ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[SHR]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[CAST]] ; @@ -2922,7 +2922,7 @@ define <2 x float> @bitcast_to_float_vect_sign_1(<2 x i32> %arg) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) <2 x float> @bitcast_to_float_vect_sign_1 ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], splat (i32 -2147483648) ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[CAST]] ; @@ -2935,7 +2935,7 @@ define <2 x float> @bitcast_to_float_vect_nan(<2 x i32> %arg) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define nofpclass(inf zero sub norm) <2 x float> @bitcast_to_float_vect_nan ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], splat (i32 2139095041) ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[CAST]] ; diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll index 70793ec5c7f833f..24bf4938ff2d4a6 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll @@ -60,9 +60,9 @@ define i32 @vec_write_3() { ; CHECK-LABEL: define {{[^@]+}}@vec_write_3 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[A:%.*]] = alloca <4 x i32>, align 16 -; CHECK-NEXT: store <2 x i32> , ptr [[A]], align 16 +; CHECK-NEXT: store <2 x i32> splat (i32 3), ptr [[A]], align 16 ; CHECK-NEXT: [[G:%.*]] = getelementptr i32, ptr [[A]], i64 1 -; CHECK-NEXT: store <2 x i32> , ptr [[G]], align 8 +; CHECK-NEXT: store <2 x i32> splat (i32 5), ptr [[G]], align 8 ; CHECK-NEXT: [[J:%.*]] = getelementptr i32, ptr [[G]], i64 1 ; CHECK-NEXT: [[L2B:%.*]] = load i32, ptr [[G]], align 8 ; CHECK-NEXT: [[ADD:%.*]] = add i32 3, [[L2B]] diff --git a/llvm/test/Transforms/BDCE/binops-multiuse.ll b/llvm/test/Transforms/BDCE/binops-multiuse.ll index 0c03ca4d6fc57e4..950a01bcf33ef54 100644 --- a/llvm/test/Transforms/BDCE/binops-multiuse.ll +++ b/llvm/test/Transforms/BDCE/binops-multiuse.ll @@ -243,8 +243,8 @@ define void @select_vectorized(i1 %c, <2 x i8> %a, <2 x i8> %b) { ; CHECK-SAME: i1 [[C:%.*]], <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], <2 x i8> [[A]], <2 x i8> [[B]] -; CHECK-NEXT: [[RET1:%.*]] = and <2 x i8> [[S]], -; CHECK-NEXT: [[RET2:%.*]] = and <2 x i8> [[S]], +; CHECK-NEXT: [[RET1:%.*]] = and <2 x i8> [[S]], splat (i8 4) +; CHECK-NEXT: [[RET2:%.*]] = and <2 x i8> [[S]], splat (i8 12) ; CHECK-NEXT: call void @use3(<2 x i8> [[RET1]]) ; CHECK-NEXT: call void @use3(<2 x i8> [[RET2]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/BDCE/dead-uses.ll b/llvm/test/Transforms/BDCE/dead-uses.ll index 85ee0dd8f2b9048..5c1677e59a8b881 100644 --- a/llvm/test/Transforms/BDCE/dead-uses.ll +++ b/llvm/test/Transforms/BDCE/dead-uses.ll @@ -27,11 +27,11 @@ define i32 @pr39771_fshr_multi_use_instr(i32 %a) { ; First fshr operand is dead (vector variant). define <2 x i32> @pr39771_fshr_multi_use_instr_vec(<2 x i32> %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr_vec( -; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> ) -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 2) +; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> splat (i32 1)) +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 23) ; CHECK-NEXT: [[D:%.*]] = xor <2 x i32> [[C]], [[B]] -; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[D]], +; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[D]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[E]] ; %x = or <2 x i32> %a, diff --git a/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll b/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll index ccf979d4fa12a2f..8527e4816009370 100644 --- a/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll +++ b/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll @@ -5,11 +5,11 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test_basic( -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, -; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) +; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1) +; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8) ; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]] -; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], +; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %a2 = add <2 x i32> %a, @@ -24,9 +24,9 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) { ; Going vector -> scalar define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test_extractelement( -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, -; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) +; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1) +; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8) ; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]] ; CHECK-NEXT: [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0 ; CHECK-NEXT: [[E:%.*]] = ashr i32 [[D]], 3 @@ -45,12 +45,12 @@ define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) { ; Going scalar -> vector define <2 x i32> @test_insertelement(i32 %a, i32 %b) { ; CHECK-LABEL: @test_insertelement( -; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, +; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) ; CHECK-NEXT: [[Y:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 ; CHECK-NEXT: [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1 -; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], +; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], splat (i32 8) ; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]] -; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], +; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[U]] ; %x = insertelement <2 x i32> poison, i32 %a, i32 0 @@ -67,12 +67,12 @@ define <2 x i32> @test_insertelement(i32 %a, i32 %b) { ; Some non-int vectors and conversions define <2 x i32> @test_conversion(<2 x i32> %a) { ; CHECK-LABEL: @test_conversion( -; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], +; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 1) +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], splat (i32 2) ; CHECK-NEXT: [[X:%.*]] = uitofp <2 x i32> [[A3]] to <2 x double> -; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], +; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[Z:%.*]] = fptoui <2 x double> [[Y]] to <2 x i32> -; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], +; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[U]] ; %a2 = add <2 x i32> %a, @@ -88,7 +88,7 @@ define <2 x i32> @test_conversion(<2 x i32> %a) { define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) { ; CHECK-LABEL: @test_assumption_invalidation( ; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> -; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, +; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, splat (i8 1) ; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]] ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1> ; CHECK-NEXT: ret <2 x i1> [[TRUNC]] diff --git a/llvm/test/Transforms/BDCE/vectors.ll b/llvm/test/Transforms/BDCE/vectors.ll index ba5ac9da63b0b45..3c50f9ca5976ed6 100644 --- a/llvm/test/Transforms/BDCE/vectors.ll +++ b/llvm/test/Transforms/BDCE/vectors.ll @@ -5,11 +5,11 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test_basic( -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, -; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) +; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1) +; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8) ; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]] -; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], +; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %a2 = add <2 x i32> %a, @@ -24,9 +24,9 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) { ; Going vector -> scalar define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test_extractelement( -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, -; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) +; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1) +; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8) ; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]] ; CHECK-NEXT: [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0 ; CHECK-NEXT: [[E:%.*]] = ashr i32 [[D]], 3 @@ -45,12 +45,12 @@ define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) { ; Going scalar -> vector define <2 x i32> @test_insertelement(i32 %a, i32 %b) { ; CHECK-LABEL: @test_insertelement( -; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, +; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) ; CHECK-NEXT: [[Y:%.*]] = insertelement <2 x i32> undef, i32 [[B:%.*]], i32 0 ; CHECK-NEXT: [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1 -; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], +; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], splat (i32 8) ; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]] -; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], +; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[U]] ; %x = insertelement <2 x i32> undef, i32 %a, i32 0 @@ -67,12 +67,12 @@ define <2 x i32> @test_insertelement(i32 %a, i32 %b) { ; Some non-int vectors and conversions define <2 x i32> @test_conversion(<2 x i32> %a) { ; CHECK-LABEL: @test_conversion( -; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], +; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 1) +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], splat (i32 2) ; CHECK-NEXT: [[X:%.*]] = uitofp <2 x i32> [[A3]] to <2 x double> -; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], +; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[Z:%.*]] = fptoui <2 x double> [[Y]] to <2 x i32> -; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], +; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[U]] ; %a2 = add <2 x i32> %a, @@ -88,7 +88,7 @@ define <2 x i32> @test_conversion(<2 x i32> %a) { define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) { ; CHECK-LABEL: @test_assumption_invalidation( ; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> -; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, +; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, splat (i8 1) ; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]] ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1> ; CHECK-NEXT: ret <2 x i1> [[TRUNC]] diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll index 33d18d0e2a795bd..7abc32e4f1cd852 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll @@ -327,10 +327,10 @@ define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind { ; CHECK-NEXT: [[REM:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[I_04:%.*]] = phi <2 x i64> [ [[INC:%.*]], %[[FOR_BODY]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: tail call void @use.2xi64(<2 x i64> [[REM]]) -; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i64> [[REM]], +; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i64> [[REM]], splat (i64 1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], [[REM_AMT]] ; CHECK-NEXT: [[TMP3]] = select <2 x i1> [[TMP2]], <2 x i64> zeroinitializer, <2 x i64> [[TMP1]] -; CHECK-NEXT: [[INC]] = add nuw <2 x i64> [[I_04]], +; CHECK-NEXT: [[INC]] = add nuw <2 x i64> [[I_04]], splat (i64 1) ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = call i1 @get.i1() ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll index 124ce321e1819d6..6ef3400812fc8e0 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll @@ -13,7 +13,7 @@ target triple = "x86_64-unknown-linux-gnu" define <4 x i32> @splat_base(ptr %base, <4 x i64> %index) { ; CHECK-LABEL: @splat_base( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i64> [[INDEX:%.*]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x ptr> poison, ptr %base, i32 0 @@ -27,7 +27,7 @@ define <4 x i32> @splat_struct(ptr %base) { ; CHECK-LABEL: @splat_struct( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], ptr [[BASE:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %gep = getelementptr %struct.a, ptr %base, <4 x i64> zeroinitializer, i32 1 @@ -39,7 +39,7 @@ define <4 x i32> @scalar_index(ptr %base, i64 %index) { ; CHECK-LABEL: @scalar_index( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x ptr> poison, ptr %base, i32 0 @@ -53,7 +53,7 @@ define <4 x i32> @splat_index(ptr %base, i64 %index) { ; CHECK-LABEL: @splat_index( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %index, i32 0 @@ -66,7 +66,7 @@ define <4 x i32> @splat_index(ptr %base, i64 %index) { define <4 x i32> @test_global_array(<4 x i64> %indxs) { ; CHECK-LABEL: @test_global_array( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr @glob_array, <4 x i64> [[INDXS:%.*]] -; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[G]] ; %p = getelementptr inbounds [16 x i32], ptr @glob_array, i64 0, <4 x i64> %indxs @@ -76,7 +76,7 @@ define <4 x i32> @test_global_array(<4 x i64> %indxs) { define <4 x i32> @global_struct_splat() { ; CHECK-LABEL: @global_struct_splat( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> , i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> , i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x ptr> poison, ptr @c, i32 0 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll index 6c9c844a4ebdb68..832870839302998 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll @@ -12,7 +12,7 @@ target triple = "x86_64-unknown-linux-gnu" define <4 x i32> @splat_base(ptr %base, <4 x i64> %index) { ; CHECK-LABEL: @splat_base( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i64> [[INDEX:%.*]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x ptr> undef, ptr %base, i32 0 @@ -26,7 +26,7 @@ define <4 x i32> @splat_struct(ptr %base) { ; CHECK-LABEL: @splat_struct( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], ptr [[BASE:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %gep = getelementptr %struct.a, ptr %base, <4 x i64> zeroinitializer, i32 1 @@ -38,7 +38,7 @@ define <4 x i32> @scalar_index(ptr %base, i64 %index) { ; CHECK-LABEL: @scalar_index( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x ptr> undef, ptr %base, i32 0 @@ -52,7 +52,7 @@ define <4 x i32> @splat_index(ptr %base, i64 %index) { ; CHECK-LABEL: @splat_index( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0 @@ -65,7 +65,7 @@ define <4 x i32> @splat_index(ptr %base, i64 %index) { define <4 x i32> @test_global_array(<4 x i64> %indxs) { ; CHECK-LABEL: @test_global_array( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr @glob_array, <4 x i64> [[INDXS:%.*]] -; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[G]] ; %p = getelementptr inbounds [16 x i32], ptr @glob_array, i64 0, <4 x i64> %indxs @@ -75,7 +75,7 @@ define <4 x i32> @test_global_array(<4 x i64> %indxs) { define <4 x i32> @global_struct_splat() { ; CHECK-LABEL: @global_struct_splat( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> , i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> , i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x ptr> undef, ptr @c, i32 0 diff --git a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll index 6d8890d71e2be95..25cd5526f89a368 100644 --- a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll +++ b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll @@ -123,17 +123,11 @@ define i64 @sdiv_minsize(i64 %a) minsize { } define <2 x i64> @sdiv_v2i64(<2 x i64> %a) { -; CV-LABEL: define <2 x i64> @sdiv_v2i64( -; CV-SAME: <2 x i64> [[A:%.*]]) { -; CV-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], -; CV-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP1]], -; CV-NEXT: ret <2 x i64> [[TMP2]] -; -; CI-LABEL: define <2 x i64> @sdiv_v2i64( -; CI-SAME: <2 x i64> [[A:%.*]]) { -; CI-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], splat (i64 4294967087) -; CI-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP1]], splat (i64 4294967087) -; CI-NEXT: ret <2 x i64> [[TMP2]] +; CHECK-LABEL: define <2 x i64> @sdiv_v2i64( +; CHECK-SAME: <2 x i64> [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], splat (i64 4294967087) +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP1]], splat (i64 4294967087) +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = sdiv <2 x i64> %a, %2 = add <2 x i64> %1, diff --git a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll index 88df92fbada075e..b21e94b2feb3f48 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll @@ -16,7 +16,7 @@ define <2 x i1> @test.vectorgep(<2 x ptr> %vec) { define <2 x i1> @test.vectorgep.ult.true(<2 x ptr> %vec) { ; CHECK-LABEL: @test.vectorgep.ult.true( ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, <2 x ptr> [[VEC:%.*]], i64 1 -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %gep.1 = getelementptr inbounds i32, <2 x ptr> %vec, i64 1 %t.1 = icmp ult <2 x ptr> %vec, %gep.1 diff --git a/llvm/test/Transforms/ConstraintElimination/vector-compares.ll b/llvm/test/Transforms/ConstraintElimination/vector-compares.ll index 3fa0e18ec5cf2cb..e87439fbaf3b087 100644 --- a/llvm/test/Transforms/ConstraintElimination/vector-compares.ll +++ b/llvm/test/Transforms/ConstraintElimination/vector-compares.ll @@ -19,7 +19,7 @@ define void @test_vector_iv(i32 %x, i1 %c) { ; CHECK-NEXT: call void @use(i1 [[C_2]]) ; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i32 [[X]], 9 ; CHECK-NEXT: call void @use(i1 [[C_3]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <4 x i8> [[IV]], +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <4 x i8> [[IV]], splat (i8 1) ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i8> [[IV_NEXT]], i8 2 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[E]], 100 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]] diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll index 200793918f0ef27..57c9f8926b524f3 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -1248,9 +1248,9 @@ define i1 @non_const_range_minmax(i8 %a, i8 %b) { define <2 x i1> @non_const_range_minmax_vec(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @non_const_range_minmax_vec( -; CHECK-NEXT: [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) -; CHECK-NEXT: [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> ) -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 10)) +; CHECK-NEXT: [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> splat (i8 11)) +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %a, <2 x i8> ) %b2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %b, <2 x i8> ) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll index 669527ed88fa2fd..5122f98f4c82fdf 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll @@ -715,7 +715,7 @@ define { i8, i1 } @signed_mul_constant_folding() { define { <2 x i32>, <2 x i1> } @uaddo_vec(<2 x i32> %a) { ; CHECK-LABEL: @uaddo_vec( -; CHECK-NEXT: [[ADD:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[ADD:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[ADD]] ; %add = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> ) @@ -909,7 +909,7 @@ cont: define <2 x i8> @uadd_sat_vec(<2 x i8> %a) { ; CHECK-LABEL: @uadd_sat_vec( -; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1)) ; CHECK-NEXT: ret <2 x i8> [[ADD]] ; %add = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll index bda270e99bc2e6f..37eb4d9c978ec8e 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll @@ -4,8 +4,8 @@ define <2 x i1> @cmp1(<2 x i8> %a) { ; CHECK-LABEL: define <2 x i1> @cmp1( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], splat (i8 1) +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nuw <2 x i8> %a, splat (i8 1) %cmp = icmp ne <2 x i8> %add, zeroinitializer @@ -15,8 +15,8 @@ define <2 x i1> @cmp1(<2 x i8> %a) { define <2 x i1> @cmp2(<2 x i8> %a) { ; CHECK-LABEL: define <2 x i1> @cmp2( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], splat (i8 5) +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nuw <2 x i8> %a, splat (i8 5) %cmp = icmp ugt <2 x i8> %add, splat (i8 2) @@ -27,7 +27,7 @@ define <2 x i1> @cmp_nonsplat(<2 x i8> %a) { ; CHECK-LABEL: define <2 x i1> @cmp_nonsplat( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nuw <2 x i8> %a, %cmp = icmp ugt <2 x i8> %add, @@ -51,7 +51,7 @@ define <2 x i1> @cmp_signedness(<2 x i8> %a) { ; CHECK-LABEL: define <2 x i1> @cmp_signedness( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i16> [[ZEXT]], splat (i16 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %zext = zext <2 x i8> %a to <2 x i16> @@ -63,7 +63,7 @@ define <2 x i16> @infer_nowrap(<2 x i8> %a) { ; CHECK-LABEL: define range(i16 1, 257) <2 x i16> @infer_nowrap( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: ret <2 x i16> [[RES]] ; %zext = zext <2 x i8> %a to <2 x i16> @@ -134,7 +134,7 @@ define <2 x i16> @saturating(<2 x i8> %a) { ; CHECK-LABEL: define range(i16 1, 257) <2 x i16> @saturating( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: ret <2 x i16> [[RES]] ; %zext = zext <2 x i8> %a to <2 x i16> @@ -146,7 +146,7 @@ define {<2 x i16>, <2 x i1>} @with_overflow(<2 x i8> %a) { ; CHECK-LABEL: define { <2 x i16>, <2 x i1> } @with_overflow( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[RES1:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[RES1:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: [[RES:%.*]] = insertvalue { <2 x i16>, <2 x i1> } { <2 x i16> poison, <2 x i1> zeroinitializer }, <2 x i16> [[RES1]], 0 ; CHECK-NEXT: ret { <2 x i16>, <2 x i1> } [[RES]] ; @@ -160,7 +160,7 @@ define <2 x i16> @srem1(<2 x i8> %a) { ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> ; CHECK-NEXT: [[RES1_LHS_TRUNC:%.*]] = trunc <2 x i16> [[ZEXT]] to <2 x i8> -; CHECK-NEXT: [[RES12:%.*]] = urem <2 x i8> [[RES1_LHS_TRUNC]], +; CHECK-NEXT: [[RES12:%.*]] = urem <2 x i8> [[RES1_LHS_TRUNC]], splat (i8 42) ; CHECK-NEXT: [[RES:%.*]] = zext <2 x i8> [[RES12]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[RES]] ; @@ -174,7 +174,7 @@ define <2 x i16> @srem2(<2 x i8> %a) { ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = sext <2 x i8> [[A]] to <2 x i16> ; CHECK-NEXT: [[RES_LHS_TRUNC:%.*]] = trunc <2 x i16> [[ZEXT]] to <2 x i8> -; CHECK-NEXT: [[RES1:%.*]] = srem <2 x i8> [[RES_LHS_TRUNC]], +; CHECK-NEXT: [[RES1:%.*]] = srem <2 x i8> [[RES_LHS_TRUNC]], splat (i8 42) ; CHECK-NEXT: [[RES:%.*]] = sext <2 x i8> [[RES1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[RES]] ; @@ -187,7 +187,7 @@ define <2 x i16> @ashr(<2 x i8> %a) { ; CHECK-LABEL: define range(i16 0, 128) <2 x i16> @ashr( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[RES:%.*]] = lshr <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[RES:%.*]] = lshr <2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: ret <2 x i16> [[RES]] ; %zext = zext <2 x i8> %a to <2 x i16> @@ -247,10 +247,10 @@ define <4 x i64> @issue_97674_getConstantOnEdge(i1 %cond) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] ; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[FOLDS:%.*]] = add nuw nsw <4 x i64> zeroinitializer, +; CHECK-NEXT: [[FOLDS:%.*]] = add nuw nsw <4 x i64> zeroinitializer, splat (i64 1) ; CHECK-NEXT: br label %[[IF_END]] ; CHECK: [[IF_END]]: -; CHECK-NEXT: [[R:%.*]] = phi <4 x i64> [ , %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ] +; CHECK-NEXT: [[R:%.*]] = phi <4 x i64> [ splat (i64 1), %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: ret <4 x i64> [[R]] ; entry: @@ -333,7 +333,7 @@ define <2 x i1> @insertelement_fold1() { ; CHECK-LABEL: define <2 x i1> @insertelement_fold1() { ; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> poison, i32 10, i64 0 ; CHECK-NEXT: [[IE2:%.*]] = insertelement <2 x i32> [[IE1]], i32 20, i64 1 -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %ie1 = insertelement <2 x i32> poison, i32 10, i64 0 %ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1 @@ -346,7 +346,7 @@ define <2 x i1> @insertelement_fold1() { define <2 x i1> @insertelement_fold2() { ; CHECK-LABEL: define <2 x i1> @insertelement_fold2() { ; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> , i32 10, i64 0 -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %ie1 = insertelement <2 x i32> , i32 10, i64 0 %icmp1 = icmp slt <2 x i32> %ie1, diff --git a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll b/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll index 8b257f86caf6c05..9f6c21450d040e7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll +++ b/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll @@ -6,9 +6,9 @@ define <4 x i32> @bar(<4 x i32> %arg, ptr %arg1) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 5, ptr [[ARG1:%.*]] -; CHECK-NEXT: [[TMP:%.*]] = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr null, <4 x i32> [[ARG:%.*]], <4 x i32> , i8 1) -; CHECK-NEXT: store i32 10, ptr [[ARG1]] +; CHECK-NEXT: store i32 5, ptr [[ARG1:%.*]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr null, <4 x i32> [[ARG:%.*]], <4 x i32> splat (i32 -1), i8 1) +; CHECK-NEXT: store i32 10, ptr [[ARG1]], align 4 ; CHECK-NEXT: ret <4 x i32> [[TMP]] ; bb: diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll index f0f52d56a9ff076..7169d9f90c66686 100644 --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -53,8 +53,8 @@ b0: define dllexport i32 @f1(ptr %a, <4 x i8> %v1, <4 x i32> %v2) { ; CHECK-LABEL: @f1( -; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> ) -; CHECK-NEXT: call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> ) +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> splat (i1 true)) +; CHECK-NEXT: tail call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret i32 0 ; tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %v2, ptr %a, i32 1, <4 x i1> ) @@ -64,8 +64,8 @@ define dllexport i32 @f1(ptr %a, <4 x i8> %v1, <4 x i32> %v2) { define dllexport i32 @f2(ptr %a, <4 x i8> %v1, <4 x i32> %v2, <4 x i1> %mask) { ; CHECK-LABEL: @f2( -; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> [[MASK:%.*]]) -; CHECK-NEXT: call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> [[MASK]]) +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> [[MASK:%.*]]) +; CHECK-NEXT: tail call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> [[MASK]]) ; CHECK-NEXT: ret i32 0 ; tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %v2, ptr %a, i32 1, <4 x i1> %mask) diff --git a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll index fbab350008f4ee7..bdc023128c85201 100644 --- a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll @@ -79,7 +79,7 @@ define void @VectorTestPartiallyOverlapping(ptr %arg, i32 %i) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64 ; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I2]] -; CHECK-NEXT: store <2 x float> , ptr [[I3]], align 16 +; CHECK-NEXT: store <2 x float> splat (float 1.000000e+00), ptr [[I3]], align 16 ; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[I6:%.*]] = zext i32 [[I5]] to i64 ; CHECK-NEXT: [[I7:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I6]] diff --git a/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll b/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll index d01ded9ebbfda83..ef2df8a3d678a98 100644 --- a/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll +++ b/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll @@ -74,8 +74,8 @@ define i32 @poison_does_not_freeze_signed(ptr %p, i32 noundef %x, i32 noundef %y define <4 x i8> @poison_does_not_freeze_vector(ptr %p, <4 x i8> noundef %x, <4 x i8> noundef %y) { ; CHECK-LABEL: define <4 x i8> @poison_does_not_freeze_vector( ; CHECK-SAME: ptr [[P:%.*]], <4 x i8> noundef [[X:%.*]], <4 x i8> noundef [[Y:%.*]]) { -; CHECK-NEXT: [[X2:%.*]] = shl nuw nsw <4 x i8> [[X]], -; CHECK-NEXT: [[Y2:%.*]] = add nuw nsw <4 x i8> [[Y]], +; CHECK-NEXT: [[X2:%.*]] = shl nuw nsw <4 x i8> [[X]], splat (i8 5) +; CHECK-NEXT: [[Y2:%.*]] = add nuw nsw <4 x i8> [[Y]], splat (i8 1) ; CHECK-NEXT: [[DIV:%.*]] = udiv <4 x i8> [[X2]], [[Y2]] ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[DIV]], [[Y2]] ; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub <4 x i8> [[X2]], [[TMP1]] diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll index 1cf7ddda7f0dd7f..edafeccd3c8cc4c 100644 --- a/llvm/test/Transforms/EarlyCSE/commute.ll +++ b/llvm/test/Transforms/EarlyCSE/commute.ll @@ -483,9 +483,9 @@ define i32 @select_not_cond(i1 %cond, i32 %t, i32 %f) { define <2 x double> @select_not_cond_commute_vec(<2 x i1> %cond, <2 x double> %t, <2 x double> %f) { ; CHECK-LABEL: @select_not_cond_commute_vec( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[COND:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[COND:%.*]], splat (i1 true) ; CHECK-NEXT: [[M1:%.*]] = select <2 x i1> [[COND]], <2 x double> [[T:%.*]], <2 x double> [[F:%.*]] -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %not = xor <2 x i1> %cond, %m1 = select <2 x i1> %cond, <2 x double> %t, <2 x double> %f @@ -833,7 +833,7 @@ define float @maxnum(float %a, float %b) { define <2 x float> @minnum(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: @minnum( ; CHECK-NEXT: [[X:%.*]] = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %x = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) %y = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> %b, <2 x float> %a) @@ -844,7 +844,7 @@ define <2 x float> @minnum(<2 x float> %a, <2 x float> %b) { define <2 x double> @maximum(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @maximum( ; CHECK-NEXT: [[X:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %x = call fast <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) %y = call <2 x double> @llvm.maximum.v2f64(<2 x double> %b, <2 x double> %a) @@ -1109,7 +1109,7 @@ define float @fma_different_add_ops(float %a, float %b, float %c, float %d) { define <2 x double> @fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd( ; CHECK-NEXT: [[X:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]]) -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %x = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) %y = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %a, <2 x double> %c) diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll index 499b5ac8de0af9a..282cb94d8c5f20e 100644 --- a/llvm/test/Transforms/EarlyCSE/gep.ll +++ b/llvm/test/Transforms/EarlyCSE/gep.ll @@ -13,7 +13,7 @@ define void @foo(ptr %a, <4 x i64> %b, i64 %i) { ; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7 ; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1 ; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> +; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> splat (i64 1) ; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V]]) ; CHECK-NEXT: [[V2:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> ; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V2]]) diff --git a/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll b/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll index c0afce55cb3e873..18e9dc41a6b6dd2 100644 --- a/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll +++ b/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll @@ -287,7 +287,7 @@ declare void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) nocapture, ptr nocapture, i define ptr addrspace(4) @neg_store_clobber(ptr addrspace(4) %loc) { ; CHECK-LABEL: @neg_store_clobber( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <2 x i64> , ptr addrspace(4) [[LOC:%.*]], align 16 +; CHECK-NEXT: store <2 x i64> splat (i64 4), ptr addrspace(4) [[LOC:%.*]], align 16 ; CHECK-NEXT: [[LOC_OFF:%.*]] = getelementptr ptr addrspace(4), ptr addrspace(4) [[LOC]], i64 1 ; CHECK-NEXT: [[REF:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LOC_OFF]], align 8 ; CHECK-NEXT: ret ptr addrspace(4) [[REF]] @@ -369,7 +369,7 @@ entry: declare void @use.v2(<2 x ptr addrspace(4)>) declare void @use.v4(<4 x ptr addrspace(4)>) - define ptr addrspace(5) @multini(i1 %alwaysFalse, ptr addrspace(4) %val, ptr %loc) { + define ptr addrspace(5) @multini(i1 %alwaysFalse, ptr addrspace(4) %val, ptr %loc) { ; CHECK-LABEL: @multini( ; CHECK-NEXT: entry: ; CHECK-NEXT: store ptr addrspace(4) [[VAL:%.*]], ptr [[LOC:%.*]], align 8 diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll index 5f2aef4f4435029..559d63651eb3239 100644 --- a/llvm/test/Transforms/GVN/non-integral-pointers.ll +++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll @@ -287,7 +287,7 @@ declare void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) nocapture, ptr nocapture, i define ptr addrspace(4) @neg_store_clobber(ptr addrspace(4) %loc) { ; CHECK-LABEL: @neg_store_clobber( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <2 x i64> , ptr addrspace(4) [[LOC:%.*]], align 16 +; CHECK-NEXT: store <2 x i64> splat (i64 4), ptr addrspace(4) [[LOC:%.*]], align 16 ; CHECK-NEXT: [[LOC_OFF:%.*]] = getelementptr ptr addrspace(4), ptr addrspace(4) [[LOC]], i64 1 ; CHECK-NEXT: [[REF:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LOC_OFF]], align 8 ; CHECK-NEXT: ret ptr addrspace(4) [[REF]] diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll index 2f6640ce9854267..8a9c81912cc9359 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll @@ -48,7 +48,7 @@ define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) { define amdgpu_kernel void @vector_gep(<4 x ptr addrspace(3)> %array) nounwind { ; CHECK-LABEL: @vector_gep( ; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <4 x ptr addrspace(3)> [[ARRAY:%.*]] to <4 x ptr> -; CHECK-NEXT: [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> +; CHECK-NEXT: [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> splat (i16 16) ; CHECK-NEXT: [[P0:%.*]] = extractelement <4 x ptr> [[P]], i32 0 ; CHECK-NEXT: [[P1:%.*]] = extractelement <4 x ptr> [[P]], i32 1 ; CHECK-NEXT: [[P2:%.*]] = extractelement <4 x ptr> [[P]], i32 2 diff --git a/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll b/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll index 9e051ae63bfc61e..e6b27cc1c452b02 100644 --- a/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll +++ b/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll @@ -6,7 +6,7 @@ define <4 x i32> @masked_gather_inferas(ptr addrspace(1) %out, <4 x i64> %index) ; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <4 x i64> [[INDEX:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT]], <4 x i64> [[INDEX]] -; CHECK-NEXT: [[VALUE:%.*]] = tail call <4 x i32> @llvm.masked.gather.v4i32.v4p1(<4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[VALUE:%.*]] = tail call <4 x i32> @llvm.masked.gather.v4i32.v4p1(<4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: ret <4 x i32> [[VALUE]] ; entry: @@ -21,7 +21,7 @@ define void @masked_scatter_inferas(ptr addrspace(1) %out, <4 x i64> %index, <4 ; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <4 x i64> [[INDEX:%.*]], <4 x i32> [[VALUE:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT]], <4 x i64> [[INDEX]] -; CHECK-NEXT: tail call void @llvm.masked.scatter.v4i32.v4p1(<4 x i32> [[VALUE]], <4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> ) +; CHECK-NEXT: tail call void @llvm.masked.scatter.v4i32.v4p1(<4 x i32> [[VALUE]], <4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll index 9e43aff45530442..397cb2af55c2c8c 100644 --- a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll +++ b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll @@ -16,8 +16,8 @@ define i1 @test(i32 %tmp6) { define <2 x i1> @test_vec(<2 x i32> %tmp6) { ; CHECK-LABEL: @test_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP6:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP6:%.*]], splat (i32 71) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -12) ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp7 = sdiv <2 x i32> %tmp6, diff --git a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll index fa63847cab2a2cf..c723e88ad65dd9e 100644 --- a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll +++ b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll @@ -20,8 +20,8 @@ define i16 @test1(i16 %a) { define <2 x i16> @test1_vec(<2 x i16> %a) { ; CHECK-LABEL: @test1_vec( -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i16> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = mul <2 x i16> [[A]], +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 8) +; CHECK-NEXT: [[D:%.*]] = mul <2 x i16> [[A]], splat (i16 5) ; CHECK-NEXT: [[E:%.*]] = or <2 x i16> [[C]], [[D]] ; CHECK-NEXT: ret <2 x i16> [[E]] ; diff --git a/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll b/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll index 0038abe5e7b1176..f334e42c85994ab 100644 --- a/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll +++ b/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll @@ -17,7 +17,7 @@ entry: define <2 x i32> @a_vec(<2 x i32> %b) nounwind { ; CHECK-LABEL: @a_vec( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP0]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll b/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll index f1f881b8a1bb008..d3cbd9434323605 100644 --- a/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll +++ b/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll @@ -4,7 +4,7 @@ define <2 x i8> @foo(<2 x i8> %x) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[A:%.*]] = srem <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = srem <2 x i8> [[X:%.*]], splat (i8 2) ; CHECK-NEXT: ret <2 x i8> [[A]] ; %A = srem <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll index 63f466a5024c827..89533e7de43d45d 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll @@ -29,7 +29,7 @@ define <4 x i32> @constantMulARM64() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulARM64( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 6) ; entry: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -40,7 +40,7 @@ define <4 x i32> @constantMulSARM64() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulSARM64( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 -1) ; entry: %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -51,7 +51,7 @@ define <4 x i32> @constantMulUARM64() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulUARM64( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 65535) ; entry: %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -62,7 +62,7 @@ define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @complex1ARM64( ; CHECK-SAME: <4 x i16> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[A:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> [[X]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[A:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> splat (i16 2), <4 x i16> [[X]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: ret <4 x i32> [[A]] ; entry: @@ -75,7 +75,7 @@ define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @complex2ARM64( ; CHECK-SAME: <4 x i32> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[X]], +; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[X]], splat (i32 6) ; CHECK-NEXT: ret <4 x i32> [[B]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll index 122f0ce31f62906..c6695f17b955bfa 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll @@ -17,7 +17,7 @@ define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: define <16 x i8> @combineXorAeseNonZeroARM64( ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) { ; CHECK-NEXT: [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]] -; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA_XOR]], <16 x i8> ) +; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA_XOR]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: ret <16 x i8> [[DATA_AES]] ; %data.xor = xor <16 x i8> %data, %key @@ -40,7 +40,7 @@ define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: define <16 x i8> @combineXorAesdNonZeroARM64( ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) { ; CHECK-NEXT: [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]] -; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> [[DATA_XOR]], <16 x i8> ) +; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> [[DATA_XOR]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: ret <16 x i8> [[DATA_AES]] ; %data.xor = xor <16 x i8> %data, %key diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index e4f8bb133e6ce67..779def76fc58d3a 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -1155,7 +1155,7 @@ define <2 x half> @constant_cvt_pkrtz() { ; Test constant values where rtz changes result define <2 x half> @constant_rtz_pkrtz() { ; CHECK-LABEL: @constant_rtz_pkrtz( -; CHECK-NEXT: ret <2 x half> +; CHECK-NEXT: ret <2 x half> splat (half 0xH7BFF) ; %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0) ret <2 x half> %cvt diff --git a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll index 5fbbc0ed57750aa..5fc5709ff889715 100644 --- a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll @@ -28,7 +28,7 @@ define <4 x i32> @constantMul() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMul( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 6) ; entry: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -39,7 +39,7 @@ define <4 x i32> @constantMulS() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulS( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 -1) ; entry: %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -50,7 +50,7 @@ define <4 x i32> @constantMulU() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulU( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 65535) ; entry: %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -61,7 +61,7 @@ define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @complex1( ; CHECK-SAME: <4 x i16> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[A:%.*]] = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> [[X]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[A:%.*]] = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> splat (i16 2), <4 x i16> [[X]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: ret <4 x i32> [[A]] ; entry: @@ -74,7 +74,7 @@ define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @complex2( ; CHECK-SAME: <4 x i32> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[X]], +; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[X]], splat (i32 6) ; CHECK-NEXT: ret <4 x i32> [[B]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll index 5369e9a94c32ed7..0056d872ff9e33d 100644 --- a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll @@ -17,7 +17,7 @@ define <16 x i8> @combineXorAeseNonZeroARM(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: define <16 x i8> @combineXorAeseNonZeroARM( ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) { ; CHECK-NEXT: [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]] -; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> [[DATA_XOR]], <16 x i8> ) +; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> [[DATA_XOR]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: ret <16 x i8> [[DATA_AES]] ; %data.xor = xor <16 x i8> %data, %key @@ -40,7 +40,7 @@ define <16 x i8> @combineXorAesdNonZeroARM(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: define <16 x i8> @combineXorAesdNonZeroARM( ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) { ; CHECK-NEXT: [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]] -; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> [[DATA_XOR]], <16 x i8> ) +; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> [[DATA_XOR]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: ret <16 x i8> [[DATA_AES]] ; %data.xor = xor <16 x i8> %data, %key diff --git a/llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll b/llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll index d5fc6d34abca9c5..ffad0ffb54f3e0b 100644 --- a/llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll +++ b/llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll @@ -282,7 +282,7 @@ entry: define <2 x i1> @vpnot_2(<2 x i1> %vin) { ; CHECK-LABEL: @vpnot_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <2 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <2 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[VOUT]] ; entry: @@ -295,7 +295,7 @@ entry: define <4 x i1> @vpnot_4(<4 x i1> %vin) { ; CHECK-LABEL: @vpnot_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <4 x i1> [[VOUT]] ; entry: @@ -308,7 +308,7 @@ entry: define <8 x i1> @vpnot_8(<8 x i1> %vin) { ; CHECK-LABEL: @vpnot_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <8 x i1> [[VOUT]] ; entry: @@ -321,7 +321,7 @@ entry: define <16 x i1> @vpnot_16(<16 x i1> %vin) { ; CHECK-LABEL: @vpnot_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <16 x i1> [[VOUT]] ; entry: @@ -337,7 +337,7 @@ entry: define <2 x i1> @vpnot_narrow_2(<2 x i1> %vin) { ; CHECK-LABEL: @vpnot_narrow_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <2 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <2 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[VOUT]] ; entry: @@ -352,7 +352,7 @@ entry: define <4 x i1> @vpnot_narrow_4(<4 x i1> %vin) { ; CHECK-LABEL: @vpnot_narrow_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <4 x i1> [[VOUT]] ; entry: @@ -367,7 +367,7 @@ entry: define <8 x i1> @vpnot_narrow_8(<8 x i1> %vin) { ; CHECK-LABEL: @vpnot_narrow_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <8 x i1> [[VOUT]] ; entry: @@ -382,7 +382,7 @@ entry: define <16 x i1> @vpnot_narrow_16(<16 x i1> %vin) { ; CHECK-LABEL: @vpnot_narrow_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <16 x i1> [[VOUT]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll index 49f4729e746e86d..5e2da4299488126 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll @@ -158,7 +158,7 @@ define <4 x i64> @mload_v4i64(ptr %f) { define <4 x i64> @mload_v4i64_cmp(ptr %f, <4 x i64> %src) { ; CHECK-LABEL: @mload_v4i64_cmp( -; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt <4 x i64> [[SRC:%.*]], +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt <4 x i64> [[SRC:%.*]], splat (i64 -1) ; CHECK-NEXT: [[LD:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr [[F:%.*]], i32 1, <4 x i1> [[ICMP]], <4 x i64> zeroinitializer) ; CHECK-NEXT: ret <4 x i64> [[LD]] ; @@ -270,7 +270,7 @@ define void @mstore_v4f64(ptr %f, <4 x double> %v) { define void @mstore_v4f64_cmp(ptr %f, <4 x i32> %src, <4 x double> %v) { ; CHECK-LABEL: @mstore_v4f64_cmp( -; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt <4 x i32> [[SRC:%.*]], +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt <4 x i32> [[SRC:%.*]], splat (i32 -1) ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> [[ICMP]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll index 04bba79aada0ab0..76e2f9af2ed1de0 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll @@ -261,7 +261,7 @@ define i32 @zero_x86_avx2_pmovmskb() { define i32 @fold_x86_mmx_pmovmskb() { ; CHECK-LABEL: @fold_x86_mmx_pmovmskb( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> splat (i64 18084223940296448)) ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = bitcast <8 x i8> to <1 x i64> diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll index 80d3c42341f8206..394754b7b54428c 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll @@ -182,8 +182,8 @@ define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <4 x i64> [[TMP7]] ; @@ -199,8 +199,8 @@ define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <8 x i64> [[TMP7]] ; @@ -216,10 +216,10 @@ define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: ret <2 x i64> [[TMP9]] ; @@ -235,10 +235,10 @@ define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[TMP10]] @@ -256,10 +256,10 @@ define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i64> [[TMP10]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll index 5504fa15dc30266..0b0b2349bb061dd 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll @@ -182,8 +182,8 @@ define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <4 x i64> [[TMP7]] ; @@ -199,8 +199,8 @@ define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <8 x i64> [[TMP7]] ; @@ -216,10 +216,10 @@ define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: ret <2 x i64> [[TMP9]] ; @@ -235,10 +235,10 @@ define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[TMP10]] @@ -256,10 +256,10 @@ define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i64> [[TMP10]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll index 74d58bae742746d..43e5f0cbefc9bee 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll @@ -359,8 +359,8 @@ define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -372,8 +372,8 @@ define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -385,8 +385,8 @@ define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -398,8 +398,8 @@ define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_128( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -411,8 +411,8 @@ define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], splat (i32 23) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -424,8 +424,8 @@ define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -437,8 +437,8 @@ define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; @@ -450,8 +450,8 @@ define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_256( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; @@ -463,8 +463,8 @@ define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], splat (i32 23) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -476,8 +476,8 @@ define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -489,8 +489,8 @@ define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; @@ -502,8 +502,8 @@ define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_512( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll index 6ddcb856692b0fe..15439ebfa2151df 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll @@ -359,8 +359,8 @@ define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -372,8 +372,8 @@ define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -385,8 +385,8 @@ define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -398,8 +398,8 @@ define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_128( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -411,8 +411,8 @@ define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], splat (i32 23) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -424,8 +424,8 @@ define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -437,8 +437,8 @@ define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; @@ -450,8 +450,8 @@ define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_256( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; @@ -463,8 +463,8 @@ define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], splat (i32 23) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -476,8 +476,8 @@ define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -489,8 +489,8 @@ define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; @@ -502,8 +502,8 @@ define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_512( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll index 947c7d38d26ee0b..4d4d619ea7cb41f 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll @@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) { define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> ) @@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128_commute( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> , <8 x i16> %a0) @@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> ) @@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256_commute( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> , <16 x i16> %a0) @@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> ) @@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512_commute( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> , <32 x i16> %a0) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll index 40ad11a69939156..3b14185edde71c8 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll @@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) { define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> ) @@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> , <8 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> splat (i16 1), <8 x i16> [[A0:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> , <8 x i16> %a0) @@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> splat (i16 1)) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> ) @@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> , <16 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> splat (i16 1), <16 x i16> [[A0:%.*]]) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> , <16 x i16> %a0) @@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> splat (i16 1)) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %a0, <32 x i16> ) @@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> , <32 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> splat (i16 1), <32 x i16> [[A0:%.*]]) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> , <32 x i16> %a0) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll b/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll index 951ed6d3d108ab4..2376d18748ee131 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll @@ -144,7 +144,7 @@ define <8 x i32> @vpternlog_d_v256_imm14(<8 x i32> %v0, <8 x i32> %v1, <8 x i32> define <8 x i64> @vpternlog_q_v512_imm15(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) { ; CHECK-LABEL: @vpternlog_q_v512_imm15( -; CHECK-NEXT: [[R:%.*]] = xor <8 x i64> [[V0:%.*]], +; CHECK-NEXT: [[R:%.*]] = xor <8 x i64> [[V0:%.*]], splat (i64 -1) ; CHECK-NEXT: ret <8 x i64> [[R]] ; %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 15) @@ -468,7 +468,7 @@ define <8 x i32> @vpternlog_d_v256_imm50(<8 x i32> %v0, <8 x i32> %v1, <8 x i32> define <8 x i64> @vpternlog_q_v512_imm51(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) { ; CHECK-LABEL: @vpternlog_q_v512_imm51( -; CHECK-NEXT: [[R:%.*]] = xor <8 x i64> [[V1:%.*]], +; CHECK-NEXT: [[R:%.*]] = xor <8 x i64> [[V1:%.*]], splat (i64 -1) ; CHECK-NEXT: ret <8 x i64> [[R]] ; %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 51) @@ -774,7 +774,7 @@ define <16 x i32> @vpternlog_d_v512_imm84(<16 x i32> %v0, <16 x i32> %v1, <16 x define <2 x i64> @vpternlog_q_v128_imm85(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2) { ; CHECK-LABEL: @vpternlog_q_v128_imm85( -; CHECK-NEXT: [[R:%.*]] = xor <2 x i64> [[V2:%.*]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i64> [[V2:%.*]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i64> [[R]] ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, i32 85) @@ -2301,7 +2301,7 @@ define <8 x i32> @vpternlog_d_v256_imm254(<8 x i32> %v0, <8 x i32> %v1, <8 x i32 define <8 x i64> @vpternlog_q_v512_imm255(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) { ; CHECK-LABEL: @vpternlog_q_v512_imm255( -; CHECK-NEXT: ret <8 x i64> +; CHECK-NEXT: ret <8 x i64> splat (i64 -1) ; %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 255) ret <8 x i64> %r @@ -2797,7 +2797,7 @@ define <16 x i32> @vpternlog_d_constv512_imm60() { define <2 x i64> @vpternlog_q_constv128_imm61() { ; CHECK-LABEL: @vpternlog_q_constv128_imm61( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -5) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 61) ret <2 x i64> %r @@ -3037,7 +3037,7 @@ define <16 x i32> @vpternlog_d_constv512_imm90() { define <2 x i64> @vpternlog_q_constv128_imm91() { ; CHECK-LABEL: @vpternlog_q_constv128_imm91( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 91) ret <2 x i64> %r @@ -3085,7 +3085,7 @@ define <16 x i32> @vpternlog_d_constv512_imm96() { define <2 x i64> @vpternlog_q_constv128_imm97() { ; CHECK-LABEL: @vpternlog_q_constv128_imm97( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -14) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 97) ret <2 x i64> %r @@ -3133,7 +3133,7 @@ define <16 x i32> @vpternlog_d_constv512_imm102() { define <2 x i64> @vpternlog_q_constv128_imm103() { ; CHECK-LABEL: @vpternlog_q_constv128_imm103( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 103) ret <2 x i64> %r @@ -3325,7 +3325,7 @@ define <16 x i32> @vpternlog_d_constv512_imm126() { define <2 x i64> @vpternlog_q_constv128_imm127() { ; CHECK-LABEL: @vpternlog_q_constv128_imm127( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 127) ret <2 x i64> %r @@ -3373,7 +3373,7 @@ define <16 x i32> @vpternlog_d_constv512_imm132() { define <2 x i64> @vpternlog_q_constv128_imm133() { ; CHECK-LABEL: @vpternlog_q_constv128_imm133( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -14) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 133) ret <2 x i64> %r @@ -3565,7 +3565,7 @@ define <16 x i32> @vpternlog_d_constv512_imm156() { define <2 x i64> @vpternlog_q_constv128_imm157() { ; CHECK-LABEL: @vpternlog_q_constv128_imm157( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 157) ret <2 x i64> %r @@ -3837,7 +3837,7 @@ define <4 x i32> @vpternlog_d_constv128_imm190() { define <4 x i64> @vpternlog_q_constv256_imm191() { ; CHECK-LABEL: @vpternlog_q_constv256_imm191( -; CHECK-NEXT: ret <4 x i64> +; CHECK-NEXT: ret <4 x i64> splat (i64 -1) ; %r = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> , <4 x i64> , <4 x i64> , i32 191) ret <4 x i64> %r @@ -4093,7 +4093,7 @@ define <16 x i32> @vpternlog_d_constv512_imm222() { define <2 x i64> @vpternlog_q_constv128_imm223() { ; CHECK-LABEL: @vpternlog_q_constv128_imm223( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 223) ret <2 x i64> %r @@ -4285,7 +4285,7 @@ define <16 x i32> @vpternlog_d_constv512_imm246() { define <2 x i64> @vpternlog_q_constv128_imm247() { ; CHECK-LABEL: @vpternlog_q_constv128_imm247( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 247) ret <2 x i64> %r @@ -4349,7 +4349,7 @@ define <8 x i32> @vpternlog_d_constv256_imm254() { define <8 x i64> @vpternlog_q_constv512_imm255() { ; CHECK-LABEL: @vpternlog_q_constv512_imm255( -; CHECK-NEXT: ret <8 x i64> +; CHECK-NEXT: ret <8 x i64> splat (i64 -1) ; %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> , <8 x i64> , <8 x i64> , i32 255) ret <8 x i64> %r diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll index 5b6ce56b4b3265f..5a74511139c117e 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll @@ -16,7 +16,7 @@ define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) @@ -25,7 +25,7 @@ define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) @@ -42,7 +42,7 @@ define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) @@ -51,7 +51,7 @@ define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) @@ -68,7 +68,7 @@ define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) @@ -77,7 +77,7 @@ define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) @@ -94,7 +94,7 @@ define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) @@ -103,7 +103,7 @@ define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) @@ -120,7 +120,7 @@ define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) { define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15) @@ -129,7 +129,7 @@ define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64) @@ -146,7 +146,7 @@ define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) { define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15) @@ -155,7 +155,7 @@ define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64) @@ -172,7 +172,7 @@ define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15) @@ -181,7 +181,7 @@ define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64) @@ -198,7 +198,7 @@ define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15) @@ -207,7 +207,7 @@ define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64) @@ -224,7 +224,7 @@ define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15) @@ -233,7 +233,7 @@ define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64) @@ -254,7 +254,7 @@ define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) @@ -279,7 +279,7 @@ define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) @@ -304,7 +304,7 @@ define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) @@ -329,7 +329,7 @@ define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) @@ -354,7 +354,7 @@ define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) @@ -379,7 +379,7 @@ define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) @@ -404,7 +404,7 @@ define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrli_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15) @@ -429,7 +429,7 @@ define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrli_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15) @@ -454,7 +454,7 @@ define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrli_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15) @@ -483,7 +483,7 @@ define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_pslli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) @@ -508,7 +508,7 @@ define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_pslli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) @@ -533,7 +533,7 @@ define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_pslli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) @@ -558,7 +558,7 @@ define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_pslli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) @@ -583,7 +583,7 @@ define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_pslli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) @@ -608,7 +608,7 @@ define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_pslli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) @@ -633,7 +633,7 @@ define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_pslli_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15) @@ -658,7 +658,7 @@ define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_pslli_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15) @@ -683,7 +683,7 @@ define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_pslli_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15) @@ -712,7 +712,7 @@ define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -721,7 +721,7 @@ define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -730,7 +730,7 @@ define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -747,7 +747,7 @@ define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -756,7 +756,7 @@ define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -765,7 +765,7 @@ define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -782,7 +782,7 @@ define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -791,7 +791,7 @@ define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -800,7 +800,7 @@ define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -817,7 +817,7 @@ define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -826,7 +826,7 @@ define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -835,7 +835,7 @@ define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -852,7 +852,7 @@ define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) { define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) @@ -861,7 +861,7 @@ define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) @@ -878,7 +878,7 @@ define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) { define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) @@ -887,7 +887,7 @@ define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) @@ -904,7 +904,7 @@ define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -913,7 +913,7 @@ define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -922,7 +922,7 @@ define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -939,7 +939,7 @@ define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -948,7 +948,7 @@ define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -957,7 +957,7 @@ define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -974,7 +974,7 @@ define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) @@ -983,7 +983,7 @@ define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) @@ -1004,7 +1004,7 @@ define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrl_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) @@ -1037,7 +1037,7 @@ define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrl_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) @@ -1070,7 +1070,7 @@ define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrl_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) @@ -1095,7 +1095,7 @@ define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrl_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) @@ -1128,7 +1128,7 @@ define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrl_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) @@ -1161,7 +1161,7 @@ define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrl_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) @@ -1186,7 +1186,7 @@ define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrl_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> ) @@ -1219,7 +1219,7 @@ define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrl_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> ) @@ -1252,7 +1252,7 @@ define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrl_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> ) @@ -1281,7 +1281,7 @@ define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psll_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) @@ -1314,7 +1314,7 @@ define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psll_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) @@ -1347,7 +1347,7 @@ define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psll_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> ) @@ -1372,7 +1372,7 @@ define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psll_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) @@ -1405,7 +1405,7 @@ define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psll_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) @@ -1438,7 +1438,7 @@ define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psll_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> ) @@ -1463,7 +1463,7 @@ define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psll_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> ) @@ -1496,7 +1496,7 @@ define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psll_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> ) @@ -1529,7 +1529,7 @@ define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psll_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> ) @@ -2911,7 +2911,7 @@ define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) { define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrav_d_128_masked( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll index b1e5fa4f9e1c94f..d38e22a4e3851be 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -16,7 +16,7 @@ define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) @@ -25,7 +25,7 @@ define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) @@ -42,7 +42,7 @@ define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) @@ -51,7 +51,7 @@ define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) @@ -68,7 +68,7 @@ define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) @@ -77,7 +77,7 @@ define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) @@ -94,7 +94,7 @@ define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) @@ -103,7 +103,7 @@ define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) @@ -120,7 +120,7 @@ define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) { define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15) @@ -129,7 +129,7 @@ define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64) @@ -146,7 +146,7 @@ define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) { define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15) @@ -155,7 +155,7 @@ define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64) @@ -172,7 +172,7 @@ define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15) @@ -181,7 +181,7 @@ define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64) @@ -198,7 +198,7 @@ define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15) @@ -207,7 +207,7 @@ define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64) @@ -224,7 +224,7 @@ define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15) @@ -233,7 +233,7 @@ define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64) @@ -254,7 +254,7 @@ define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) @@ -279,7 +279,7 @@ define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) @@ -304,7 +304,7 @@ define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) @@ -329,7 +329,7 @@ define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) @@ -354,7 +354,7 @@ define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) @@ -379,7 +379,7 @@ define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) @@ -404,7 +404,7 @@ define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrli_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15) @@ -429,7 +429,7 @@ define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrli_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15) @@ -454,7 +454,7 @@ define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrli_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15) @@ -483,7 +483,7 @@ define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_pslli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) @@ -508,7 +508,7 @@ define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_pslli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) @@ -533,7 +533,7 @@ define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_pslli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) @@ -558,7 +558,7 @@ define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_pslli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) @@ -583,7 +583,7 @@ define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_pslli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) @@ -608,7 +608,7 @@ define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_pslli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) @@ -633,7 +633,7 @@ define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_pslli_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15) @@ -658,7 +658,7 @@ define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_pslli_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15) @@ -683,7 +683,7 @@ define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_pslli_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15) @@ -712,7 +712,7 @@ define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -721,7 +721,7 @@ define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -730,7 +730,7 @@ define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -747,7 +747,7 @@ define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -756,7 +756,7 @@ define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -765,7 +765,7 @@ define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -782,7 +782,7 @@ define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -791,7 +791,7 @@ define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -800,7 +800,7 @@ define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -817,7 +817,7 @@ define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -826,7 +826,7 @@ define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -835,7 +835,7 @@ define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -852,7 +852,7 @@ define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) { define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) @@ -861,7 +861,7 @@ define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) @@ -878,7 +878,7 @@ define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) { define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) @@ -887,7 +887,7 @@ define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) @@ -904,7 +904,7 @@ define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -913,7 +913,7 @@ define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -922,7 +922,7 @@ define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -939,7 +939,7 @@ define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -948,7 +948,7 @@ define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -957,7 +957,7 @@ define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -974,7 +974,7 @@ define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) @@ -983,7 +983,7 @@ define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) @@ -1004,7 +1004,7 @@ define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrl_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) @@ -1037,7 +1037,7 @@ define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrl_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) @@ -1070,7 +1070,7 @@ define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrl_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) @@ -1095,7 +1095,7 @@ define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrl_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) @@ -1128,7 +1128,7 @@ define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrl_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) @@ -1161,7 +1161,7 @@ define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrl_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) @@ -1186,7 +1186,7 @@ define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrl_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> ) @@ -1219,7 +1219,7 @@ define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrl_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> ) @@ -1252,7 +1252,7 @@ define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrl_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> ) @@ -1281,7 +1281,7 @@ define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psll_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) @@ -1314,7 +1314,7 @@ define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psll_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) @@ -1347,7 +1347,7 @@ define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psll_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> ) @@ -1372,7 +1372,7 @@ define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psll_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) @@ -1405,7 +1405,7 @@ define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psll_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) @@ -1438,7 +1438,7 @@ define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psll_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> ) @@ -1463,7 +1463,7 @@ define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psll_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> ) @@ -1496,7 +1496,7 @@ define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psll_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> ) @@ -1529,7 +1529,7 @@ define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psll_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> ) @@ -2951,7 +2951,7 @@ define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) { define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrav_d_128_masked( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll index ec010b02c86a028..b95f00a1e9df038 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll @@ -199,7 +199,7 @@ define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: @cmp_strue_v16i8( -; CHECK-NEXT: ret <16 x i8> +; CHECK-NEXT: ret <16 x i8> splat (i8 -1) ; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 @@ -207,7 +207,7 @@ define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: @cmp_utrue_v16i8( -; CHECK-NEXT: ret <16 x i8> +; CHECK-NEXT: ret <16 x i8> splat (i8 -1) ; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-xop.ll b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll index 644cbc8824f3b31..81db68145638c12 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-xop.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll @@ -199,7 +199,7 @@ define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: @cmp_strue_v16i8( -; CHECK-NEXT: ret <16 x i8> +; CHECK-NEXT: ret <16 x i8> splat (i8 -1) ; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 @@ -207,7 +207,7 @@ define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: @cmp_utrue_v16i8( -; CHECK-NEXT: ret <16 x i8> +; CHECK-NEXT: ret <16 x i8> splat (i8 -1) ; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll index 63287e59f66346c..7037647d116ba2a 100644 --- a/llvm/test/Transforms/InstCombine/abs-1.ll +++ b/llvm/test/Transforms/InstCombine/abs-1.ll @@ -385,7 +385,7 @@ define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) { define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) { ; CHECK-LABEL: @shifty_abs_commute2( -; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[ABS:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[Y]], i1 false) ; CHECK-NEXT: ret <2 x i8> [[ABS]] ; diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index f7c639f1d8e6ba2..022d60d2f501bf6 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -178,9 +178,9 @@ define i32 @abs_trailing_zeros_negative(i32 %x) { define <4 x i32> @abs_trailing_zeros_negative_vec(<4 x i32> %x) { ; CHECK-LABEL: @abs_trailing_zeros_negative_vec( -; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 -2) ; CHECK-NEXT: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[AND]], i1 false) -; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[ABS]], +; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[ABS]], splat (i32 -4) ; CHECK-NEXT: ret <4 x i32> [[AND2]] ; %and = and <4 x i32> %x, @@ -207,7 +207,7 @@ define i32 @abs_signbits(i30 %x) { define <4 x i32> @abs_signbits_vec(<4 x i30> %x) { ; CHECK-LABEL: @abs_signbits_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i30> @llvm.abs.v4i30(<4 x i30> [[X:%.*]], i1 false) -; CHECK-NEXT: [[NARROW:%.*]] = add nuw <4 x i30> [[TMP1]], +; CHECK-NEXT: [[NARROW:%.*]] = add nuw <4 x i30> [[TMP1]], splat (i30 1) ; CHECK-NEXT: [[ADD:%.*]] = zext <4 x i30> [[NARROW]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[ADD]] ; @@ -317,7 +317,7 @@ define i32 @zext_abs(i31 %x) { define <3 x i82> @lshr_abs(<3 x i82> %x) { ; CHECK-LABEL: @lshr_abs( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], splat (i82 1) ; CHECK-NEXT: ret <3 x i82> [[LSHR]] ; %lshr = lshr <3 x i82> %x, @@ -497,7 +497,7 @@ define i32 @demand_low_bit(i32 %x) { define <3 x i82> @demand_low_bit_int_min_is_poison(<3 x i82> %x) { ; CHECK-LABEL: @demand_low_bit_int_min_is_poison( -; CHECK-NEXT: [[R:%.*]] = shl <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl <3 x i82> [[X:%.*]], splat (i82 81) ; CHECK-NEXT: ret <3 x i82> [[R]] ; %a = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %x, i1 true) @@ -530,9 +530,9 @@ define i32 @srem_by_2_int_min_is_poison(i32 %x) { define <3 x i82> @srem_by_2(<3 x i82> %x, ptr %p) { ; CHECK-LABEL: @srem_by_2( -; CHECK-NEXT: [[S:%.*]] = srem <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[S:%.*]] = srem <3 x i82> [[X:%.*]], splat (i82 2) ; CHECK-NEXT: store <3 x i82> [[S]], ptr [[P:%.*]], align 32 -; CHECK-NEXT: [[R:%.*]] = and <3 x i82> [[X]], +; CHECK-NEXT: [[R:%.*]] = and <3 x i82> [[X]], splat (i82 1) ; CHECK-NEXT: ret <3 x i82> [[R]] ; %s = srem <3 x i82> %x, diff --git a/llvm/test/Transforms/InstCombine/add-mask-neg.ll b/llvm/test/Transforms/InstCombine/add-mask-neg.ll index b72f051a0b799b3..cb23763249d6365 100644 --- a/llvm/test/Transforms/InstCombine/add-mask-neg.ll +++ b/llvm/test/Transforms/InstCombine/add-mask-neg.ll @@ -78,8 +78,8 @@ define i32 @dec_mask_multiuse_neg_i32(i32 %X) { define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) { ; CHECK-LABEL: @dec_mask_neg_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[DEC]] ; @@ -91,8 +91,8 @@ define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) { define <2 x i32> @dec_mask_neg_v2i32_poison(<2 x i32> %X) { ; CHECK-LABEL: @dec_mask_neg_v2i32_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[DEC]] ; @@ -106,7 +106,7 @@ define <2 x i32> @dec_mask_multiuse_neg_multiuse_v2i32(<2 x i32> %X) { ; CHECK-LABEL: @dec_mask_multiuse_neg_multiuse_v2i32( ; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[X]], [[NEG]] -; CHECK-NEXT: [[DEC:%.*]] = add <2 x i32> [[MASK]], +; CHECK-NEXT: [[DEC:%.*]] = add <2 x i32> [[MASK]], splat (i32 -1) ; CHECK-NEXT: call void @usev(<2 x i32> [[NEG]]) ; CHECK-NEXT: call void @usev(<2 x i32> [[MASK]]) ; CHECK-NEXT: ret <2 x i32> [[DEC]] diff --git a/llvm/test/Transforms/InstCombine/add-mask.ll b/llvm/test/Transforms/InstCombine/add-mask.ll index 00c14e351642e5e..6cbb3442d34e092 100644 --- a/llvm/test/Transforms/InstCombine/add-mask.ll +++ b/llvm/test/Transforms/InstCombine/add-mask.ll @@ -32,7 +32,7 @@ define i32 @add_mask_sign_commute_i32(i32 %x) { define <2 x i32> @add_mask_sign_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @add_mask_sign_v2i32( ; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> splat (i32 7), <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = ashr <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll b/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll index 82224496d90ec75..84462f9a7f592b8 100644 --- a/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll +++ b/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll @@ -38,7 +38,7 @@ define i32 @add-shl-sdiv-scalar2(i32 %x) { define <3 x i8> @add-shl-sdiv-splat0(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-splat0( -; CHECK-NEXT: [[RZ:%.*]] = srem <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RZ:%.*]] = srem <3 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <3 x i8> [[RZ]] ; %sd = sdiv <3 x i8> %x, @@ -49,7 +49,7 @@ define <3 x i8> @add-shl-sdiv-splat0(<3 x i8> %x) { define <4 x i32> @add-shl-sdiv-splat1(<4 x i32> %x) { ; CHECK-LABEL: @add-shl-sdiv-splat1( -; CHECK-NEXT: [[RZ:%.*]] = srem <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[RZ:%.*]] = srem <4 x i32> [[X:%.*]], splat (i32 1073741824) ; CHECK-NEXT: ret <4 x i32> [[RZ]] ; %sd = sdiv <4 x i32> %x, @@ -60,7 +60,7 @@ define <4 x i32> @add-shl-sdiv-splat1(<4 x i32> %x) { define <2 x i64> @add-shl-sdiv-splat2(<2 x i64> %x) { ; CHECK-LABEL: @add-shl-sdiv-splat2( -; CHECK-NEXT: [[RZ:%.*]] = srem <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[RZ:%.*]] = srem <2 x i64> [[X:%.*]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[RZ]] ; %sd = sdiv <2 x i64> %x, @@ -134,9 +134,9 @@ define i32 @add-shl-sdiv-i32-use3(i32 %x) { declare void @use3xi8(<3 x i8>) define <3 x i8> @add-shl-sdiv-use4(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-use4( -; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4) ; CHECK-NEXT: call void @use3xi8(<3 x i8> [[SD]]) -; CHECK-NEXT: [[RZ:%.*]] = srem <3 x i8> [[X]], +; CHECK-NEXT: [[RZ:%.*]] = srem <3 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <3 x i8> [[RZ]] ; %sd = sdiv <3 x i8> %x, @@ -187,8 +187,8 @@ define i32 @add-shl-sdiv-negative2(i32 %x) { define <3 x i8> @add-shl-sdiv-negative3(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-negative3( -; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[SL:%.*]] = shl <3 x i8> [[SD]], +; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -5) +; CHECK-NEXT: [[SL:%.*]] = shl <3 x i8> [[SD]], splat (i8 2) ; CHECK-NEXT: [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]] ; CHECK-NEXT: ret <3 x i8> [[RZ]] ; @@ -222,7 +222,7 @@ define <3 x i8> @add-shl-sdiv-3xi8-undef0(<3 x i8> %x) { define <3 x i8> @add-shl-sdiv-3xi8-undef1(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-3xi8-undef1( -; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4) ; CHECK-NEXT: [[SL:%.*]] = shl <3 x i8> [[SD]], ; CHECK-NEXT: [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]] ; CHECK-NEXT: ret <3 x i8> [[RZ]] @@ -250,7 +250,7 @@ define <2 x i64> @add-shl-sdiv-nonsplat0(<2 x i64> %x) { define <3 x i8> @add-shl-sdiv-nonsplat1(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-nonsplat1( -; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4) ; CHECK-NEXT: [[SL:%.*]] = shl <3 x i8> [[SD]], ; CHECK-NEXT: [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]] ; CHECK-NEXT: ret <3 x i8> [[RZ]] diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll index f1afcaf5f85d2ae..fae1365dfa85333 100644 --- a/llvm/test/Transforms/InstCombine/add-sitofp.ll +++ b/llvm/test/Transforms/InstCombine/add-sitofp.ll @@ -101,8 +101,8 @@ define float @test_3(i32 %a, i32 %b) { define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @test_4( -; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], +; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 1073741823) +; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], splat (i32 1073741823) ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]] ; CHECK-NEXT: [[RES:%.*]] = uitofp nneg <4 x i32> [[TMP1]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[RES]] @@ -120,8 +120,8 @@ define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) { define <4 x float> @test_4_neg(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @test_4_neg( -; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], +; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 1073741823) +; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], splat (i32 1073741823) ; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg <4 x i32> [[A_AND]] to <4 x float> ; CHECK-NEXT: [[B_AND_FP:%.*]] = uitofp nneg <4 x i32> [[B_AND]] to <4 x float> ; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A_AND_FP]], [[B_AND_FP]] diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 417c3a950d7805e..4b1159cf07e7105 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -17,7 +17,7 @@ define i32 @select_0_or_1_from_bool(i1 %x) { define <2 x i32> @select_0_or_1_from_bool_vec(<2 x i1> %x) { ; CHECK-LABEL: @select_0_or_1_from_bool_vec( -; CHECK-NEXT: [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i1> [[NOT_X]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[ADD]] ; @@ -62,8 +62,8 @@ define i32 @flip_and_mask(i32 %x) { define <2 x i8> @flip_and_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @flip_and_mask_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[INC:%.*]] = xor <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[INC:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[INC]] ; %shl = shl <2 x i8> %x, @@ -291,7 +291,7 @@ define i1 @test11(i8 %A) { define <2 x i1> @test11vec(<2 x i8> %a) { ; CHECK-LABEL: @test11vec( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add <2 x i8> %a, @@ -312,7 +312,7 @@ define i8 @reassoc_shl1(i8 %x, i8 %y) { define <2 x i8> @reassoc_shl1_commute1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @reassoc_shl1_commute1( -; CHECK-NEXT: [[REASS_ADD:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[REASS_ADD:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[REASS_ADD]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -521,7 +521,7 @@ define i32 @test19(i1 %C) { define <2 x i32> @test19vec(i1 %C) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1123), <2 x i32> splat (i32 133) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -554,7 +554,7 @@ define i32 @xor_sign_bit(i32 %x) { define <2 x i32> @xor_sign_bit_vec_splat(<2 x i32> %x) { ; CHECK-LABEL: @xor_sign_bit_vec_splat( -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -2147483606) ; CHECK-NEXT: ret <2 x i32> [[ADD]] ; %xor = xor <2 x i32> %x, @@ -684,7 +684,7 @@ define i1 @test21(i32 %x) { define <2 x i1> @test21vec(<2 x i32> %x) { ; CHECK-LABEL: @test21vec( -; CHECK-NEXT: [[Y:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 119) ; CHECK-NEXT: ret <2 x i1> [[Y]] ; %t = add <2 x i32> %x, @@ -882,8 +882,8 @@ define i8 @masked_add(i8 %x) { define <2 x i8> @masked_add_splat(<2 x i8> %x) { ; CHECK-LABEL: @masked_add_splat( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -64) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[AND]], splat (i8 64) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %and = and <2 x i8> %x, ; 0xc0 @@ -1008,7 +1008,7 @@ define i64 @test41_multiuse_constants_cancel(i32 %a) { define <2 x i64> @test41vec(<2 x i32> %a) { ; CHECK-LABEL: @test41vec( -; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i32> [[A:%.*]], splat (i32 15) ; CHECK-NEXT: [[SUB:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SUB]] ; @@ -1020,10 +1020,10 @@ define <2 x i64> @test41vec(<2 x i32> %a) { define <2 x i64> @test41vec_and_multiuse(<2 x i32> %a) { ; CHECK-LABEL: @test41vec_and_multiuse( -; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i32> [[ADD]] to <2 x i64> -; CHECK-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw <2 x i64> [[ZEXT]], -; CHECK-NEXT: [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[REASS_ADD]], +; CHECK-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw <2 x i64> [[ZEXT]], splat (i64 1) +; CHECK-NEXT: [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[REASS_ADD]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i64> [[EXTRAUSE]] ; %add = add nuw <2 x i32> %a, @@ -1045,7 +1045,7 @@ define i32 @test42(i1 %C) { define <2 x i32> @test42vec(i1 %C) { ; CHECK-LABEL: @test42vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1123), <2 x i32> splat (i32 133) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -1092,7 +1092,7 @@ define <2 x i32> @test43vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 1123), [[ENTRY:%.*]] ], [ splat (i32 133), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A]] ; entry: @@ -1230,7 +1230,7 @@ define i32 @test44_non_matching(i32 %A) { define <2 x i32> @test44_vec(<2 x i32> %A) { ; CHECK-LABEL: @test44_vec( -; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -124) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = or <2 x i32> %A, @@ -1240,8 +1240,8 @@ define <2 x i32> @test44_vec(<2 x i32> %A) { define <2 x i32> @test44_vec_non_matching(<2 x i32> %A) { ; CHECK-LABEL: @test44_vec_non_matching( -; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 123) +; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], splat (i32 -321) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = or <2 x i32> %A, @@ -1301,7 +1301,7 @@ define i5 @and_add(i1 %x, i1 %y) { define <2 x i8> @ashr_add_commute(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @ashr_add_commute( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[TMP3]] @@ -1994,7 +1994,7 @@ define i8 @mul_add_common_factor_commute1(i8 %x, i8 %y) { define <2 x i8> @mul_add_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @mul_add_common_factor_commute2( -; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 1) ; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[M1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[A]] ; @@ -2061,8 +2061,8 @@ define i8 @not_mul(i8 %x) { define <2 x i8> @not_mul_commute(<2 x i8> %p) { ; CHECK-LABEL: @not_mul_commute( ; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i8> [[X]], -; CHECK-NEXT: [[PLUSX:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i8> [[X]], splat (i8 43) +; CHECK-NEXT: [[PLUSX:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i8> [[PLUSX]] ; %x = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization @@ -2134,7 +2134,7 @@ define i8 @full_ashr_inc(i8 %x) { define <2 x i6> @full_ashr_inc_vec(<2 x i6> %x) { ; CHECK-LABEL: @full_ashr_inc_vec( -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i6> [[X:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i6> [[X:%.*]], splat (i6 -1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[ISNOTNEG]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; @@ -2520,7 +2520,7 @@ define i8 @mul_negpow2(i8 %x, i8 %y) { define <2 x i8> @mul_negpow2_commute_vec(<2 x i8> %x, <2 x i8> %p) { ; CHECK-LABEL: @mul_negpow2_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[A:%.*]] = sub <2 x i8> [[Y]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[A]] ; @@ -2704,7 +2704,7 @@ define i5 @zext_zext_not(i3 noundef %x) { define <2 x i5> @zext_zext_not_commute(<2 x i3> noundef %x) { ; CHECK-LABEL: @zext_zext_not_commute( -; CHECK-NEXT: ret <2 x i5> +; CHECK-NEXT: ret <2 x i5> splat (i5 7) ; %zx = zext <2 x i3> %x to <2 x i5> %notx = xor <2 x i3> %x, @@ -2886,7 +2886,7 @@ define i8 @floor_sdiv_by_2_wrong_cast(i8 %x) { define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) { ; CHECK-LABEL: @floor_sdiv_vec_commute( -; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %d = sdiv <2 x i32> %x, @@ -3190,7 +3190,7 @@ define i32 @dec_zext_add_nonzero(i8 %x) { define <2 x i32> @dec_zext_add_nonzero_vec(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8) ; CHECK-NEXT: [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[C]] ; @@ -3206,9 +3206,9 @@ define <2 x i32> @dec_zext_add_nonzero_vec(<2 x i8> %x) { define <2 x i32> @dec_zext_add_nonzero_vec_undef0(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec_undef0( ; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[O]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[O]], splat (i8 -1) ; CHECK-NEXT: [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], +; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %o = or <2 x i8> %x, @@ -3233,7 +3233,7 @@ define <2 x i32> @dec_zext_add_nonzero_poison0(<2 x i8> %x) { define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison1( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8) ; CHECK-NEXT: [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[C]] ; @@ -3246,8 +3246,8 @@ define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) { define <2 x i32> @dec_zext_add_nonzero_vec_poison2(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison2( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i8> [[O]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8) +; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i8> [[O]], splat (i8 -1) ; CHECK-NEXT: [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32> ; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], ; CHECK-NEXT: ret <2 x i32> [[C]] diff --git a/llvm/test/Transforms/InstCombine/add4.ll b/llvm/test/Transforms/InstCombine/add4.ll index 77f7fc7b35cd44a..0e97deb4d98ade4 100644 --- a/llvm/test/Transforms/InstCombine/add4.ll +++ b/llvm/test/Transforms/InstCombine/add4.ll @@ -19,7 +19,7 @@ define i64 @match_unsigned(i64 %x) { define <2 x i64> @match_unsigned_vector(<2 x i64> %x) { ; CHECK-LABEL: @match_unsigned_vector( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[UREM:%.*]] = urem <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[UREM:%.*]] = urem <2 x i64> [[X:%.*]], splat (i64 19136) ; CHECK-NEXT: ret <2 x i64> [[UREM]] ; bb: @@ -63,7 +63,7 @@ define i64 @match_signed(i64 %x) { define <2 x i64> @match_signed_vector(<2 x i64> %x) { ; CHECK-LABEL: @match_signed_vector( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[SREM1:%.*]] = srem <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[SREM1:%.*]] = srem <2 x i64> [[X:%.*]], splat (i64 172224) ; CHECK-NEXT: ret <2 x i64> [[SREM1]] ; bb: diff --git a/llvm/test/Transforms/InstCombine/add_or_sub.ll b/llvm/test/Transforms/InstCombine/add_or_sub.ll index ef44f036b71fa42..7d005d3cb16f146 100644 --- a/llvm/test/Transforms/InstCombine/add_or_sub.ll +++ b/llvm/test/Transforms/InstCombine/add_or_sub.ll @@ -58,7 +58,7 @@ define i64 @add_or_sub_comb_i64_commuted4(i64 %p) { define <3 x i32> @add_or_sub_comb_i32vec(<3 x i32> %p) { ; CHECK-LABEL: @add_or_sub_comb_i32vec( ; CHECK-NEXT: [[X:%.*]] = mul <3 x i32> [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = add <3 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <3 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[ADD:%.*]] = and <3 x i32> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i32> [[ADD]] ; @@ -72,7 +72,7 @@ define <3 x i32> @add_or_sub_comb_i32vec(<3 x i32> %p) { define <4 x i16> @add_or_sub_comb_i32vec_poison(<4 x i16> %p) { ; CHECK-LABEL: @add_or_sub_comb_i32vec_poison( ; CHECK-NEXT: [[X:%.*]] = mul <4 x i16> [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[X]], splat (i16 -1) ; CHECK-NEXT: [[ADD:%.*]] = and <4 x i16> [[TMP1]], [[X]] ; CHECK-NEXT: ret <4 x i16> [[ADD]] ; diff --git a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll index da92ca11a9e0d0c..b38466f6cf0ec2e 100644 --- a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll +++ b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll @@ -31,7 +31,7 @@ define i32 @add_const_add_const_extrause(i32 %arg) { define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_add_const( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 10) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -41,9 +41,9 @@ define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) { define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_add_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 10) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -89,7 +89,7 @@ define i32 @add_const_sub_const_extrause(i32 %arg) { define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_sub_const( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -99,9 +99,9 @@ define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) { define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_sub_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -260,7 +260,7 @@ define i32 @add_const_const_sub_extrause(i32 %arg) { define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_const_sub( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 -6), [[ARG:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -270,9 +270,9 @@ define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) { define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_const_sub_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 -6), [[ARG]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -318,7 +318,7 @@ define i32 @sub_const_add_const_extrause(i32 %arg) { define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_add_const( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -328,9 +328,9 @@ define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) { define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_add_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -376,7 +376,7 @@ define i32 @sub_const_sub_const_extrause(i32 %arg) { define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_sub_const( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -10) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -386,9 +386,9 @@ define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) { define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_sub_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -10) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -434,7 +434,7 @@ define i32 @sub_const_const_sub_extrause(i32 %arg) { define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_const_sub( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -444,9 +444,9 @@ define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) { define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_const_sub_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -492,7 +492,7 @@ define i32 @const_sub_add_const_extrause(i32 %arg) { define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_add_const( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -502,9 +502,9 @@ define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) { define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_add_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]] ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -550,7 +550,7 @@ define i32 @const_sub_sub_const_extrause(i32 %arg) { define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_sub_const( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 6), [[ARG:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -560,9 +560,9 @@ define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) { define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_sub_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]] ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 6), [[ARG]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -609,7 +609,7 @@ define i32 @const_sub_const_sub_extrause(i32 %arg) { define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_const_sub( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -619,9 +619,9 @@ define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) { define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_const_sub_extrause( -; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]] ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg diff --git a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll index 76fc7a07be6bd61..0b4720c1a209fb1 100644 --- a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll +++ b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll @@ -115,7 +115,7 @@ define i32 @umax3(i32 %n) { define <2 x i32> @umax3_vec(<2 x i32> %n) { ; CHECK-LABEL: @umax3_vec( -; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 5)) ; CHECK-NEXT: ret <2 x i32> [[M]] ; %t = icmp ugt <2 x i32> %n, @@ -139,7 +139,7 @@ define i32 @umin3(i32 %n) { define <2 x i32> @umin3_vec(<2 x i32> %n) { ; CHECK-LABEL: @umin3_vec( -; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 6)) ; CHECK-NEXT: ret <2 x i32> [[M]] ; %t = icmp ult <2 x i32> %n, @@ -211,7 +211,7 @@ define i32 @umax4(i32 %n) { define <2 x i32> @umax4_vec(<2 x i32> %n) { ; CHECK-LABEL: @umax4_vec( -; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 8)) ; CHECK-NEXT: ret <2 x i32> [[M]] ; %t = icmp uge <2 x i32> %n, @@ -235,7 +235,7 @@ define i32 @umin4(i32 %n) { define <2 x i32> @umin4_vec(<2 x i32> %n) { ; CHECK-LABEL: @umin4_vec( -; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 9)) ; CHECK-NEXT: ret <2 x i32> [[M]] ; %t = icmp ule <2 x i32> %n, @@ -305,7 +305,7 @@ define i64 @umax_sext(i32 %a) { define <2 x i64> @umax_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_sext_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: [[MAX:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MAX]] ; @@ -329,7 +329,7 @@ define i64 @umin_sext(i32 %a) { define <2 x i64> @umin_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2)) ; CHECK-NEXT: [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; @@ -353,7 +353,7 @@ define i64 @umax_sext2(i32 %a) { define <2 x i64> @umax_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_sext2_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2)) ; CHECK-NEXT: [[MIN:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; @@ -377,7 +377,7 @@ define i64 @umin_sext2(i32 %a) { define <2 x i64> @umin_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext2_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; @@ -401,7 +401,7 @@ define i64 @umax_zext(i32 %a) { define <2 x i64> @umax_zext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_zext_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: [[MAX:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MAX]] ; @@ -425,7 +425,7 @@ define i64 @umin_zext(i32 %a) { define <2 x i64> @umin_zext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_zext_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2)) ; CHECK-NEXT: [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll index 9f8d3e317accc73..c8f93410f9e9c8f 100644 --- a/llvm/test/Transforms/InstCombine/and-compare.ll +++ b/llvm/test/Transforms/InstCombine/and-compare.ll @@ -23,7 +23,7 @@ define i1 @test1(i32 %a, i32 %b) { define <2 x i1> @test1vec(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test1vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65280) ; CHECK-NEXT: [[TMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TMP]] ; @@ -46,7 +46,7 @@ define i1 @test2(i64 %A) { define <2 x i1> @test2vec(<2 x i64> %A) { ; CHECK-LABEL: @test2vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 128) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -68,7 +68,7 @@ define i1 @test3(i64 %A) { define <2 x i1> @test3vec(<2 x i64> %A) { ; CHECK-LABEL: @test3vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 128) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i64> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll index c163802fcc935cb..c03c9d69051edd3 100644 --- a/llvm/test/Transforms/InstCombine/and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll @@ -4644,7 +4644,7 @@ define i1 @clang_builtin_isnormal_inf_check(half %x) { define <2 x i1> @clang_builtin_isnormal_inf_check_vector(<2 x half> %x) { ; CHECK-LABEL: @clang_builtin_isnormal_inf_check_vector( ; CHECK-NEXT: [[FABS_X:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]]) -; CHECK-NEXT: [[AND:%.*]] = fcmp oeq <2 x half> [[FABS_X]], +; CHECK-NEXT: [[AND:%.*]] = fcmp oeq <2 x half> [[FABS_X]], splat (half 0xH7C00) ; CHECK-NEXT: ret <2 x i1> [[AND]] ; %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) diff --git a/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll b/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll index de5de37fe2df64a..ee76e0eb1051090 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll @@ -139,7 +139,7 @@ define i1 @ne_multi_c2(i8 %x, i8 %y) { define <2 x i1> @eq_vector(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @eq_vector( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -1) ; CHECK-NEXT: [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; @@ -152,7 +152,7 @@ define <2 x i1> @eq_vector(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @ne_vector_equal_5(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @ne_vector_equal_5( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6) ; CHECK-NEXT: [[AND:%.*]] = icmp ult <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[AND]] ; @@ -179,7 +179,7 @@ define <2 x i1> @eq_vector_equal_minus_1(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @ne_vector_equal_minus_7(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @ne_vector_equal_minus_7( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 6) ; CHECK-NEXT: [[AND:%.*]] = icmp ult <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[AND]] ; @@ -193,8 +193,8 @@ define <2 x i1> @ne_vector_equal_minus_7(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @eq_vector_unequal1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @eq_vector_unequal1( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[SUB:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[SUB:%.*]] = add <2 x i8> [[X]], splat (i8 -5) +; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 2) ; CHECK-NEXT: [[C2:%.*]] = icmp ugt <2 x i8> [[SUB]], [[Y]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[C1]], [[C2]] ; CHECK-NEXT: ret <2 x i1> [[OR]] @@ -209,8 +209,8 @@ define <2 x i1> @eq_vector_unequal1(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @ne_vector_unequal2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @ne_vector_unequal2( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], splat (i8 7) +; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -3) ; CHECK-NEXT: [[C2:%.*]] = icmp ule <2 x i8> [[ADD]], [[Y]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[C1]], [[C2]] ; CHECK-NEXT: ret <2 x i1> [[AND]] @@ -228,7 +228,7 @@ define <2 x i1> @ne_vector_unequal2(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @eq_vector_poison_icmp(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @eq_vector_poison_icmp( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6) ; CHECK-NEXT: [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; @@ -242,7 +242,7 @@ define <2 x i1> @eq_vector_poison_icmp(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @eq_vector_poison_add(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @eq_vector_poison_add( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6) ; CHECK-NEXT: [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 2b5d430e295757e..fffe1f8426690b7 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -335,8 +335,8 @@ define i1 @and_ne_with_diff_one_signed_logical(i64 %x) { define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -33) +; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 65) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %cmp1 = icmp eq <2 x i32> %x, @@ -347,8 +347,8 @@ define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @and_ne_with_diff_one_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -41) +; CHECK-NEXT: [[AND:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2) ; CHECK-NEXT: ret <2 x i1> [[AND]] ; %cmp1 = icmp ne <2 x i32> %x, @@ -984,7 +984,7 @@ define <2 x i1> @substitute_constant_or_ne_slt_swap_vec_logical(<2 x i8> %x, <2 ; CHECK-LABEL: @substitute_constant_or_ne_slt_swap_vec_logical( ; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X:%.*]], ; CHECK-NEXT: [[C2:%.*]] = icmp slt <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C1]], <2 x i1> , <2 x i1> [[C2]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C1]], <2 x i1> splat (i1 true), <2 x i1> [[C2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c1 = icmp ne <2 x i8> %x, @@ -2154,7 +2154,7 @@ define i1 @samesign(i32 %x, i32 %y) { define <2 x i1> @samesign_different_sign_bittest1(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @samesign_different_sign_bittest1( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %a = and <2 x i32> %x, %y @@ -2509,7 +2509,7 @@ define i1 @samesign_inverted_wrong_cmp(i32 %x, i32 %y) { define <2 x i1> @icmp_eq_m1_and_eq_m1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @icmp_eq_m1_and_eq_m1( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rx = icmp eq <2 x i8> %x, @@ -2521,7 +2521,7 @@ define <2 x i1> @icmp_eq_m1_and_eq_m1(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @icmp_eq_m1_and_eq_poison_m1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @icmp_eq_m1_and_eq_poison_m1( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rx = icmp eq <2 x i8> %x, @@ -2571,7 +2571,7 @@ define <2 x i1> @icmp_eq_m1_or_eq_m1_fail(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @icmp_ne_m1_or_ne_m1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @icmp_ne_m1_or_ne_m1( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rx = icmp ne <2 x i8> %x, @@ -2680,7 +2680,7 @@ define <2 x i64> @icmp_slt_0_or_icmp_sgt_0_i64x2(<2 x i64> %x) { define <2 x i64> @icmp_slt_0_or_icmp_sgt_0_i64x2_fail(<2 x i64> %x) { ; CHECK-LABEL: @icmp_slt_0_or_icmp_sgt_0_i64x2_fail( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[X:%.*]], splat (i64 1) ; CHECK-NEXT: [[E:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[E]] ; @@ -2794,7 +2794,7 @@ define i64 @icmp_slt_0_and_icmp_sge_neg1_i64_fail(i64 %x) { define <2 x i32> @icmp_slt_0_and_icmp_sge_neg1_i32x2(<2 x i32> %x) { ; CHECK-LABEL: @icmp_slt_0_and_icmp_sge_neg1_i32x2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -2807,7 +2807,7 @@ define <2 x i32> @icmp_slt_0_and_icmp_sge_neg1_i32x2(<2 x i32> %x) { define <2 x i32> @icmp_slt_0_and_icmp_sge_neg2_i32x2(<2 x i32> %x) { ; CHECK-LABEL: @icmp_slt_0_and_icmp_sge_neg2_i32x2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -3) ; CHECK-NEXT: [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -3085,7 +3085,7 @@ entry: define <4 x i1> @logical_and_icmps_vec1(<4 x i32> %a, <4 x i1> %other_cond) { ; CHECK-LABEL: @logical_and_icmps_vec1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], splat (i32 10086) ; CHECK-NEXT: [[RET:%.*]] = select <4 x i1> [[OTHER_COND:%.*]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/and-or-not.ll b/llvm/test/Transforms/InstCombine/and-or-not.ll index 5e6c480df5d103a..e06e1b1f7255fc5 100644 --- a/llvm/test/Transforms/InstCombine/and-or-not.ll +++ b/llvm/test/Transforms/InstCombine/and-or-not.ll @@ -682,7 +682,7 @@ define i4 @simplify_or_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q) { define <2 x i4> @simplify_or_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %p) { ; CHECK-LABEL: @simplify_or_common_op_commute3( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %z = mul <2 x i4> %p, %p ; thwart complexity-based canonicalization %xy = and <2 x i4> %y, %x diff --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll index fee055a2e124517..2f3e0c2e26ca82e 100644 --- a/llvm/test/Transforms/InstCombine/and-or.ll +++ b/llvm/test/Transforms/InstCombine/and-or.ll @@ -60,7 +60,7 @@ define i32 @or_and_not_constant_commute3(i32 %a, i32 %b) { define <2 x i7> @or_and_not_constant_commute0_splat(<2 x i7> %a, <2 x i7> %b) { ; CHECK-LABEL: @or_and_not_constant_commute0_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i7> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i7> [[A:%.*]], splat (i7 42) ; CHECK-NEXT: [[T3:%.*]] = or <2 x i7> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i7> [[T3]] ; @@ -118,13 +118,13 @@ define i8 @or_and_or_commute1(i8 %x) { define <2 x i8> @or_and_or_commute1_splat(<2 x i8> %x) { ; CHECK-LABEL: @or_and_or_commute1_splat( -; CHECK-NEXT: [[XN:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[XN:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 16) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[XN]]) -; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], splat (i8 59) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) -; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], splat (i8 64) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], splat (i8 123) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %xn = or <2 x i8> %x, @@ -162,14 +162,14 @@ define i8 @or_and_or_commute2(i8 %x, i8 %y) { define <2 x i8> @or_and_or_commute2_splat(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @or_and_or_commute2_splat( -; CHECK-NEXT: [[N:%.*]] = lshr <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[N:%.*]] = lshr <2 x i8> [[Y:%.*]], splat (i8 6) ; CHECK-NEXT: [[XN:%.*]] = or <2 x i8> [[N]], [[X:%.*]] ; CHECK-NEXT: call void @use_vec(<2 x i8> [[XN]]) -; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], splat (i8 -69) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) -; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], splat (i8 64) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], splat (i8 -5) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %n = lshr <2 x i8> %y, @@ -234,16 +234,16 @@ define i8 @or_and2_or2(i8 %x) { define <2 x i8> @or_and2_or2_splat(<2 x i8> %x) { ; CHECK-LABEL: @or_and2_or2_splat( -; CHECK-NEXT: [[O1:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[O1:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[O1]]) -; CHECK-NEXT: [[O2:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[O2:%.*]] = or <2 x i8> [[X]], splat (i8 2) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[O2]]) -; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[O1]], +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[O1]], splat (i8 -71) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) -; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[O2]], +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[O2]], splat (i8 66) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) -; CHECK-NEXT: [[BITFIELD:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i8> [[BITFIELD]], +; CHECK-NEXT: [[BITFIELD:%.*]] = and <2 x i8> [[X]], splat (i8 -8) +; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i8> [[BITFIELD]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %o1 = or <2 x i8> %x, @@ -277,8 +277,8 @@ define i8 @and_or_hoist_mask(i8 %a, i8 %b) { define <2 x i8> @and_xor_hoist_mask_vec_splat(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @and_xor_hoist_mask_vec_splat( -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B_MASKED:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6) +; CHECK-NEXT: [[B_MASKED:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 3) ; CHECK-NEXT: [[AND:%.*]] = xor <2 x i8> [[SH]], [[B_MASKED]] ; CHECK-NEXT: ret <2 x i8> [[AND]] ; @@ -305,9 +305,9 @@ define i8 @and_xor_hoist_mask_commute(i8 %a, i8 %b) { define <2 x i8> @and_or_hoist_mask_commute_vec_splat(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @and_or_hoist_mask_commute_vec_splat( -; CHECK-NEXT: [[C:%.*]] = mul <2 x i8> [[B:%.*]], -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[C_MASKED:%.*]] = and <2 x i8> [[C]], +; CHECK-NEXT: [[C:%.*]] = mul <2 x i8> [[B:%.*]], splat (i8 3) +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6) +; CHECK-NEXT: [[C_MASKED:%.*]] = and <2 x i8> [[C]], splat (i8 3) ; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[C_MASKED]], [[SH]] ; CHECK-NEXT: ret <2 x i8> [[AND]] ; diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll index 3dbf9af7e193435..5a0890e918ef0f0 100644 --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -66,7 +66,7 @@ define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) { ; CHECK-SAME: (<2 x i32> [[PA:%.*]], <2 x i32> [[PB:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = udiv <2 x i32> , [[PA]] ; CHECK-NEXT: [[B:%.*]] = udiv <2 x i32> , [[PB]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[B]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[B]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[A]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -83,7 +83,7 @@ define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) { define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) { ; CHECK-LABEL: define {{[^@]+}}@and_xor_common_op_constant ; CHECK-SAME: (<4 x i32> [[A:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A]], splat (i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -740,9 +740,9 @@ define i32 @not_and_and_not(i32 %a0, i32 %b, i32 %c) { define <4 x i64> @not_and_and_not_4i64(<4 x i64> %a0, <4 x i64> %b, <4 x i64> %c) { ; CHECK-LABEL: define {{[^@]+}}@not_and_and_not_4i64 ; CHECK-SAME: (<4 x i64> [[A0:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = sdiv <4 x i64> , [[A0]] +; CHECK-NEXT: [[A:%.*]] = sdiv <4 x i64> splat (i64 42), [[A0]] ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i64> [[B]], [[C]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: [[AND2:%.*]] = and <4 x i64> [[A]], [[TMP2]] ; CHECK-NEXT: ret <4 x i64> [[AND2]] ; @@ -835,9 +835,9 @@ define i32 @not_or_or_not(i32 %a0, i32 %b, i32 %c) { define <2 x i6> @not_or_or_not_2i6(<2 x i6> %a0, <2 x i6> %b, <2 x i6> %c) { ; CHECK-LABEL: define {{[^@]+}}@not_or_or_not_2i6 ; CHECK-SAME: (<2 x i6> [[A0:%.*]], <2 x i6> [[B:%.*]], <2 x i6> [[C:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i6> , [[A0]] +; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i6> splat (i6 3), [[A0]] ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i6> [[B]], [[C]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i6> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i6> [[TMP1]], splat (i6 -1) ; CHECK-NEXT: [[OR2:%.*]] = or <2 x i6> [[A]], [[TMP2]] ; CHECK-NEXT: ret <2 x i6> [[OR2]] ; @@ -4013,7 +4013,7 @@ define i4 @and_orn_xor(i4 %a, i4 %b) { define <2 x i4> @and_orn_xor_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute1 ; CHECK-SAME: (<2 x i4> [[A:%.*]], <2 x i4> [[B:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[A]], splat (i4 -1) ; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[B]], [[TMP1]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -4392,8 +4392,8 @@ define i32 @canonicalize_logic_first_or0_nswnuw(i32 %x) { define <2 x i32> @canonicalize_logic_first_or_vector0(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15) +; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[TMP1]], splat (i32 112) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add <2 x i32> , %x ; <0x00000070, 0x00000070> @@ -4404,8 +4404,8 @@ define <2 x i32> @canonicalize_logic_first_or_vector0(<2 x i32> %x) { define <2 x i32> @canonicalize_logic_first_or_vector0_nsw(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0_nsw ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 112) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add nsw <2 x i32> , %x ; <0x00000070, 0x00000070> @@ -4416,8 +4416,8 @@ define <2 x i32> @canonicalize_logic_first_or_vector0_nsw(<2 x i32> %x) { define <2 x i32> @canonicalize_logic_first_or_vector0_nswnuw(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0_nswnuw ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], splat (i32 112) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add nsw nuw <2 x i32> , %x ; <0x00000070, 0x00000070> @@ -4538,8 +4538,8 @@ define i8 @canonicalize_logic_first_and0_nswnuw(i8 %x) { define <2 x i8> @canonicalize_logic_first_and_vector0(<2 x i8> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0 ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], splat (i8 48) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add <2 x i8> , %x @@ -4550,8 +4550,8 @@ define <2 x i8> @canonicalize_logic_first_and_vector0(<2 x i8> %x) { define <2 x i8> @canonicalize_logic_first_and_vector0_nsw(<2 x i8> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0_nsw ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], splat (i8 48) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add nsw <2 x i8> , %x @@ -4562,8 +4562,8 @@ define <2 x i8> @canonicalize_logic_first_and_vector0_nsw(<2 x i8> %x) { define <2 x i8> @canonicalize_logic_first_and_vector0_nswnuw(<2 x i8> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0_nswnuw ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[TMP1]], splat (i8 48) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add nsw nuw <2 x i8> , %x @@ -4589,7 +4589,7 @@ define <2 x i8> @canonicalize_logic_first_and_vector1(<2 x i8> %x) { define <2 x i32> @canonicalize_logic_first_and_vector2(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector2 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X]], splat (i32 612368384) ; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[A]], ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -4675,8 +4675,8 @@ define i8 @canonicalize_logic_first_xor_0_nswnuw(i8 %x) { define <2 x i32> @canonicalize_logic_first_xor_vector0(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783) +; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add <2 x i32> , %x ; <0xFF800000, 0xFF800000> @@ -4687,8 +4687,8 @@ define <2 x i32> @canonicalize_logic_first_xor_vector0(<2 x i32> %x) { define <2 x i32> @canonicalize_logic_first_xor_vector0_nsw(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0_nsw ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add nsw <2 x i32> , %x ; <0xFF800000, 0xFF800000> @@ -4699,8 +4699,8 @@ define <2 x i32> @canonicalize_logic_first_xor_vector0_nsw(<2 x i32> %x) { define <2 x i32> @canonicalize_logic_first_xor_vector0_nswnuw(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0_nswnuw ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add nsw nuw <2 x i32> , %x ; <0xFF800000, 0xFF800000> diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index 466718c80230078..f0fd0e262a795f6 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -322,7 +322,7 @@ define i1 @test18(i32 %A) { define <2 x i1> @test18_vec(<2 x i32> %A) { ; CHECK-LABEL: @test18_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 127) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = and <2 x i32> %A, @@ -342,7 +342,7 @@ define i1 @test18a(i8 %A) { define <2 x i1> @test18a_vec(<2 x i8> %A) { ; CHECK-LABEL: @test18a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 2) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = and <2 x i8> %A, @@ -396,7 +396,7 @@ define i1 @test23_logical(i32 %A) { define <2 x i1> @test23vec(<2 x i32> %A) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %B = icmp sgt <2 x i32> %A, @@ -455,8 +455,8 @@ define i1 @test25_logical(i32 %A) { define <2 x i1> @test25vec(<2 x i32> %A) { ; CHECK-LABEL: @test25vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -50) +; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 50) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %B = icmp sge <2 x i32> %A, @@ -504,9 +504,9 @@ define i32 @ashr_lowmask_use(i32 %x) { define <2 x i8> @ashr_lowmask_use_splat(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @ashr_lowmask_use_splat( -; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[X]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = ashr <2 x i8> %x, @@ -596,7 +596,7 @@ define i32 @test31(i1 %X) { define <2 x i32> @and_demanded_bits_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @and_demanded_bits_splat_vec( -; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %y = add <2 x i32> %x, @@ -658,7 +658,7 @@ define i32 @test33b(i32 %b) { define <2 x i32> @test33vec(<2 x i32> %b) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[T13:%.*]] = xor <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[T13:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T13]] ; %t4.mask = and <2 x i32> %b, @@ -670,7 +670,7 @@ define <2 x i32> @test33vec(<2 x i32> %b) { define <2 x i32> @test33vecb(<2 x i32> %b) { ; CHECK-LABEL: @test33vecb( -; CHECK-NEXT: [[T13:%.*]] = xor <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[T13:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T13]] ; %t4.mask = and <2 x i32> %b, @@ -716,7 +716,7 @@ define i64 @test35(i32 %X) { define <2 x i64> @test35_uniform(<2 x i32> %X) { ; CHECK-LABEL: @test35_uniform( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240) ; CHECK-NEXT: [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -741,8 +741,8 @@ define i64 @test36(i32 %X) { define <2 x i64> @test36_uniform(<2 x i32> %X) { ; CHECK-LABEL: @test36_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240) ; CHECK-NEXT: [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -780,8 +780,8 @@ define i64 @test37(i32 %X) { define <2 x i64> @test37_uniform(<2 x i32> %X) { ; CHECK-LABEL: @test37_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240) ; CHECK-NEXT: [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -988,7 +988,7 @@ define i32 @test40(i1 %C) { define <2 x i32> @test40vec(i1 %C) { ; CHECK-LABEL: @test40vec( -; CHECK-NEXT: [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 104), <2 x i32> splat (i32 10) ; CHECK-NEXT: ret <2 x i32> [[A]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -1035,7 +1035,7 @@ define <2 x i32> @test41vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 104), [[ENTRY:%.*]] ], [ splat (i32 10), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A]] ; entry: @@ -1448,7 +1448,7 @@ define i32 @lowbitmask_casted_shift_use2(i8 %x) { define <2 x i59> @lowbitmask_casted_shift_vec_splat(<2 x i47> %x) { ; CHECK-LABEL: @lowbitmask_casted_shift_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i47> [[X:%.*]] to <2 x i59> -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i59> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i59> [[TMP1]], splat (i59 5) ; CHECK-NEXT: ret <2 x i59> [[R]] ; %a = ashr <2 x i47> %x, @@ -1525,10 +1525,10 @@ define i32 @not_lowmask_sext_in_reg2(i32 %x) { define <2 x i32> @lowmask_sext_in_reg_splat(<2 x i32> %x, ptr %p) { ; CHECK-LABEL: @lowmask_sext_in_reg_splat( -; CHECK-NEXT: [[L:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i32> [[L]], +; CHECK-NEXT: [[L:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 20) +; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i32> [[L]], splat (i32 20) ; CHECK-NEXT: store <2 x i32> [[R]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], splat (i32 4095) ; CHECK-NEXT: ret <2 x i32> [[AND]] ; %l = shl <2 x i32> %x, @@ -1576,9 +1576,9 @@ define i8 @lowmask_add_2_uses(i8 %x) { define <2 x i8> @lowmask_add_2_splat(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @lowmask_add_2_splat( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -64) ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], splat (i8 63) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add <2 x i8> %x, ; 0xc0 @@ -1619,9 +1619,9 @@ define i8 @not_lowmask_add2(i8 %x) { define <2 x i8> @lowmask_add_splat(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @lowmask_add_splat( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -64) ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], splat (i8 32) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add <2 x i8> %x, ; 0xc0 @@ -1670,8 +1670,8 @@ define i8 @flip_masked_bit(i8 %A) { define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) { ; CHECK-LABEL: @flip_masked_bit_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = xor <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 16) +; CHECK-NEXT: [[C:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 16) ; CHECK-NEXT: ret <2 x i8> [[C]] ; %B = add <2 x i8> %A, @@ -1681,7 +1681,7 @@ define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) { define <2 x i8> @flip_masked_bit_poison(<2 x i8> %A) { ; CHECK-LABEL: @flip_masked_bit_poison( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[C]] ; @@ -1692,7 +1692,7 @@ define <2 x i8> @flip_masked_bit_poison(<2 x i8> %A) { define <2 x i8> @flip_masked_bit_nonuniform(<2 x i8> %A) { ; CHECK-LABEL: @flip_masked_bit_nonuniform( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[C]] ; @@ -2084,8 +2084,8 @@ define i16 @shl_ashr_pow2_const_case1(i16 %x) { define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_uniform_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], splat (i16 7) +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> , %x @@ -2123,7 +2123,7 @@ define <3 x i16> @shl_lshr_pow2_const_case1_non_uniform_vec_negative(<3 x i16> % define <3 x i16> @shl_lshr_pow2_const_case1_poison1_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> , %x @@ -2135,7 +2135,7 @@ define <3 x i16> @shl_lshr_pow2_const_case1_poison1_vec(<3 x i16> %x) { define <3 x i16> @shl_lshr_pow2_const_case1_poison2_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison2_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> , %x @@ -2146,8 +2146,8 @@ define <3 x i16> @shl_lshr_pow2_const_case1_poison2_vec(<3 x i16> %x) { define <3 x i16> @shl_lshr_pow2_const_case1_poison3_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison3_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], +; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> splat (i16 16), [[X:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], splat (i16 5) ; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], ; CHECK-NEXT: ret <3 x i16> [[R]] ; @@ -2380,8 +2380,8 @@ define i16 @lshr_shl_pow2_const_negative_oneuse(i16 %x) { define <3 x i16> @lshr_shl_pow2_const_case1_uniform_vec(<3 x i16> %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1_uniform_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], splat (i16 12) +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 128), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %lshr = lshr <3 x i16> , %x @@ -2419,7 +2419,7 @@ define <3 x i16> @lshr_shl_pow2_const_case1_non_uniform_vec_negative(<3 x i16> % define <3 x i16> @lshr_shl_pow2_const_case1_poison1_vec(<3 x i16> %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 128), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %lshr = lshr <3 x i16> , %x @@ -2430,9 +2430,9 @@ define <3 x i16> @lshr_shl_pow2_const_case1_poison1_vec(<3 x i16> %x) { define <3 x i16> @lshr_shl_pow2_const_case1_poison2_vec(<3 x i16> %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison2_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> , [[X:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> splat (i16 8192), [[X:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[LSHR]], -; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], splat (i16 128) ; CHECK-NEXT: ret <3 x i16> [[R]] ; %lshr = lshr <3 x i16> , %x @@ -2443,8 +2443,8 @@ define <3 x i16> @lshr_shl_pow2_const_case1_poison2_vec(<3 x i16> %x) { define <3 x i16> @lshr_shl_pow2_const_case1_poison3_vec(<3 x i16> %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison3_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> , [[X:%.*]] -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[LSHR]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> splat (i16 8192), [[X:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[LSHR]], splat (i16 6) ; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], ; CHECK-NEXT: ret <3 x i16> [[R]] ; @@ -2564,7 +2564,7 @@ define i32 @and_zext_multiuse(i32 %a, i1 %b) { define <2 x i32> @and_zext_vec(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_zext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -2811,8 +2811,8 @@ define i32 @add_constant_equal_with_the_top_bit_of_demandedbits_pass(i32 %x) { define <2 x i16> @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector(<2 x i16> %x) { ; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[AND:%.*]] = xor <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 24) +; CHECK-NEXT: [[AND:%.*]] = xor <2 x i16> [[TMP1]], splat (i16 16) ; CHECK-NEXT: ret <2 x i16> [[AND]] ; %add = add <2 x i16> %x, diff --git a/llvm/test/Transforms/InstCombine/and2.ll b/llvm/test/Transforms/InstCombine/and2.ll index 104486e7638f56a..ce49f4e8f6f12eb 100644 --- a/llvm/test/Transforms/InstCombine/and2.ll +++ b/llvm/test/Transforms/InstCombine/and2.ll @@ -83,8 +83,8 @@ define i1 @test8_logical(i32 %i) { define <2 x i1> @test8vec(<2 x i32> %i) { ; CHECK-LABEL: @test8vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[I:%.*]], -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[I:%.*]], splat (i32 -1) +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 13) ; CHECK-NEXT: ret <2 x i1> [[COND]] ; %cmp1 = icmp ne <2 x i32> %i, zeroinitializer @@ -107,7 +107,7 @@ define i64 @test9(i64 %x) { ; combine -x & 1 into x & 1 define <2 x i64> @test9vec(<2 x i64> %x) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 1) ; CHECK-NEXT: ret <2 x i64> [[AND]] ; %sub = sub nsw <2 x i64> , %x @@ -207,7 +207,7 @@ define i8 @and1_lshr1_is_cmp_eq_0_multiuse(i8 %x) { define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec(<2 x i8> %x) { ; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_vec( -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> splat (i8 1), [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %sh = lshr <2 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/apint-add.ll b/llvm/test/Transforms/InstCombine/apint-add.ll index 7a6ffed919a4147..c4398a61debc062 100644 --- a/llvm/test/Transforms/InstCombine/apint-add.ll +++ b/llvm/test/Transforms/InstCombine/apint-add.ll @@ -36,7 +36,7 @@ define i15 @test3(i15 %x) { ; X + signbit --> X ^ signbit define <2 x i5> @test3vec(<2 x i5> %x) { ; CHECK-LABEL: @test3vec( -; CHECK-NEXT: [[Y:%.*]] = xor <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = xor <2 x i5> [[X:%.*]], splat (i5 -16) ; CHECK-NEXT: ret <2 x i5> [[Y]] ; %y = add <2 x i5> %x, diff --git a/llvm/test/Transforms/InstCombine/apint-mul1.ll b/llvm/test/Transforms/InstCombine/apint-mul1.ll index 2a00275db2606fb..7327630c5d0a03f 100644 --- a/llvm/test/Transforms/InstCombine/apint-mul1.ll +++ b/llvm/test/Transforms/InstCombine/apint-mul1.ll @@ -15,7 +15,7 @@ define i17 @test1(i17 %X) { define <2 x i17> @test2(<2 x i17> %X) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[Y:%.*]] = shl <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = shl <2 x i17> [[X:%.*]], splat (i17 10) ; CHECK-NEXT: ret <2 x i17> [[Y]] ; %Y = mul <2 x i17> %X, diff --git a/llvm/test/Transforms/InstCombine/apint-mul2.ll b/llvm/test/Transforms/InstCombine/apint-mul2.ll index 12c44755b7e4a30..b3937f0965dcf21 100644 --- a/llvm/test/Transforms/InstCombine/apint-mul2.ll +++ b/llvm/test/Transforms/InstCombine/apint-mul2.ll @@ -16,7 +16,7 @@ define i177 @test1(i177 %X) { define <2 x i177> @test2(<2 x i177> %X) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[Y:%.*]] = shl <2 x i177> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = shl <2 x i177> [[X:%.*]], splat (i177 155) ; CHECK-NEXT: ret <2 x i177> [[Y]] ; %C = shl <2 x i177> , diff --git a/llvm/test/Transforms/InstCombine/apint-select.ll b/llvm/test/Transforms/InstCombine/apint-select.ll index c96c3dc36869354..247be32751ae84f 100644 --- a/llvm/test/Transforms/InstCombine/apint-select.ll +++ b/llvm/test/Transforms/InstCombine/apint-select.ll @@ -63,7 +63,7 @@ define <2 x i32> @sext_vec(<2 x i1> %C) { define <2 x i999> @not_zext_vec(<2 x i1> %C) { ; CHECK-LABEL: @not_zext_vec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], +; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], splat (i1 true) ; CHECK-NEXT: [[V:%.*]] = zext <2 x i1> [[NOT_C]] to <2 x i999> ; CHECK-NEXT: ret <2 x i999> [[V]] ; @@ -73,7 +73,7 @@ define <2 x i999> @not_zext_vec(<2 x i1> %C) { define <2 x i64> @not_sext_vec(<2 x i1> %C) { ; CHECK-LABEL: @not_sext_vec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], +; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], splat (i1 true) ; CHECK-NEXT: [[V:%.*]] = sext <2 x i1> [[NOT_C]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[V]] ; @@ -85,7 +85,7 @@ define <2 x i64> @not_sext_vec(<2 x i1> %C) { define <2 x i32> @scalar_select_of_vectors(i1 %c) { ; CHECK-LABEL: @scalar_select_of_vectors( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[V]] ; %V = select i1 %c, <2 x i32> , <2 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll index 21c6c18009d1dfd..3cc530bdbd02103 100644 --- a/llvm/test/Transforms/InstCombine/apint-shift.ll +++ b/llvm/test/Transforms/InstCombine/apint-shift.ll @@ -95,7 +95,7 @@ define i19 @test10(i19 %X) { define <2 x i19> @lshr_lshr_splat_vec(<2 x i19> %X) { ; CHECK-LABEL: @lshr_lshr_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i19> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i19> [[X:%.*]], splat (i19 5) ; CHECK-NEXT: ret <2 x i19> [[SH1]] ; %sh1 = lshr <2 x i19> %X, @@ -118,8 +118,8 @@ define i9 @multiuse_lshr_lshr(i9 %x) { define <2 x i9> @multiuse_lshr_lshr_splat(<2 x i9> %x) { ; CHECK-LABEL: @multiuse_lshr_lshr_splat( -; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i9> [[X:%.*]], -; CHECK-NEXT: [[SH2:%.*]] = lshr <2 x i9> [[X]], +; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i9> [[X:%.*]], splat (i9 2) +; CHECK-NEXT: [[SH2:%.*]] = lshr <2 x i9> [[X]], splat (i9 5) ; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i9> [[SH1]], [[SH2]] ; CHECK-NEXT: ret <2 x i9> [[MUL]] ; @@ -134,7 +134,7 @@ define <2 x i9> @multiuse_lshr_lshr_splat(<2 x i9> %x) { define <2 x i19> @shl_shl_splat_vec(<2 x i19> %X) { ; CHECK-LABEL: @shl_shl_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i19> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i19> [[X:%.*]], splat (i19 5) ; CHECK-NEXT: ret <2 x i19> [[SH1]] ; %sh1 = shl <2 x i19> %X, @@ -157,8 +157,8 @@ define i42 @multiuse_shl_shl(i42 %x) { define <2 x i42> @multiuse_shl_shl_splat(<2 x i42> %x) { ; CHECK-LABEL: @multiuse_shl_shl_splat( -; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i42> [[X:%.*]], -; CHECK-NEXT: [[SH2:%.*]] = shl <2 x i42> [[X]], +; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i42> [[X:%.*]], splat (i42 8) +; CHECK-NEXT: [[SH2:%.*]] = shl <2 x i42> [[X]], splat (i42 17) ; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i42> [[SH1]], [[SH2]] ; CHECK-NEXT: ret <2 x i42> [[MUL]] ; @@ -173,7 +173,7 @@ define <2 x i42> @multiuse_shl_shl_splat(<2 x i42> %x) { define <2 x i19> @eq_shl_lshr_splat_vec(<2 x i19> %X) { ; CHECK-LABEL: @eq_shl_lshr_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = and <2 x i19> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = and <2 x i19> [[X:%.*]], splat (i19 65535) ; CHECK-NEXT: ret <2 x i19> [[SH1]] ; %sh1 = shl <2 x i19> %X, @@ -186,7 +186,7 @@ define <2 x i19> @eq_shl_lshr_splat_vec(<2 x i19> %X) { define <2 x i19> @eq_lshr_shl_splat_vec(<2 x i19> %X) { ; CHECK-LABEL: @eq_lshr_shl_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = and <2 x i19> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = and <2 x i19> [[X:%.*]], splat (i19 -8) ; CHECK-NEXT: ret <2 x i19> [[SH1]] ; %sh1 = lshr <2 x i19> %X, @@ -199,8 +199,8 @@ define <2 x i19> @eq_lshr_shl_splat_vec(<2 x i19> %X) { define <2 x i7> @lshr_shl_splat_vec(<2 x i7> %X) { ; CHECK-LABEL: @lshr_shl_splat_vec( -; CHECK-NEXT: [[DOTNEG:%.*]] = mul <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[SH2:%.*]] = and <2 x i7> [[DOTNEG]], +; CHECK-NEXT: [[DOTNEG:%.*]] = mul <2 x i7> [[X:%.*]], splat (i7 60) +; CHECK-NEXT: [[SH2:%.*]] = and <2 x i7> [[DOTNEG]], splat (i7 60) ; CHECK-NEXT: ret <2 x i7> [[SH2]] ; %mul = mul <2 x i7> %X, @@ -214,8 +214,8 @@ define <2 x i7> @lshr_shl_splat_vec(<2 x i7> %X) { define <2 x i7> @shl_lshr_splat_vec(<2 x i7> %X) { ; CHECK-LABEL: @shl_lshr_splat_vec( -; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw <2 x i7> [[DIV]], +; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i7> [[X:%.*]], splat (i7 9) +; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw <2 x i7> [[DIV]], splat (i7 1) ; CHECK-NEXT: ret <2 x i7> [[SH1]] ; %div = udiv <2 x i7> %X, @@ -250,7 +250,7 @@ define i47 @test12(i47 %X) { define <2 x i47> @test12_splat_vec(<2 x i47> %X) { ; CHECK-LABEL: @test12_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = and <2 x i47> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = and <2 x i47> [[X:%.*]], splat (i47 -256) ; CHECK-NEXT: ret <2 x i47> [[SH1]] ; %sh1 = ashr <2 x i47> %X, @@ -328,7 +328,7 @@ define i1 @test16(i84 %X) { define <2 x i1> @test16vec(<2 x i84> %X) { ; CHECK-LABEL: @test16vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], splat (i84 16) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i84> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -375,8 +375,8 @@ define i1 @test17(i106 %A) { define <2 x i1> @test17vec(<2 x i106> %A) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i106> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i106> [[B_MASK]], +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i106> [[A:%.*]], splat (i106 -8) +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i106> [[B_MASK]], splat (i106 9872) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = lshr <2 x i106> %A, @@ -405,7 +405,7 @@ define i1 @test19(i37 %A) { define <2 x i1> @test19vec(<2 x i37> %A) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i37> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i37> [[A:%.*]], splat (i37 4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i37> %A, @@ -425,7 +425,7 @@ define i1 @test19a(i39 %A) { define <2 x i1> @test19a_vec(<2 x i39> %A) { ; CHECK-LABEL: @test19a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i39> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i39> [[A:%.*]], splat (i39 -5) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i39> %A, @@ -494,8 +494,8 @@ define i44 @shl_lshr_eq_amt_multi_use(i44 %A) { define <2 x i44> @shl_lshr_eq_amt_multi_use_splat_vec(<2 x i44> %A) { ; CHECK-LABEL: @shl_lshr_eq_amt_multi_use_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i44> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = and <2 x i44> [[A]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i44> [[A:%.*]], splat (i44 33) +; CHECK-NEXT: [[C:%.*]] = and <2 x i44> [[A]], splat (i44 2047) ; CHECK-NEXT: [[D:%.*]] = or disjoint <2 x i44> [[B]], [[C]] ; CHECK-NEXT: ret <2 x i44> [[D]] ; @@ -524,8 +524,8 @@ define i43 @lshr_shl_eq_amt_multi_use(i43 %A) { define <2 x i43> @lshr_shl_eq_amt_multi_use_splat_vec(<2 x i43> %A) { ; CHECK-LABEL: @lshr_shl_eq_amt_multi_use_splat_vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i43> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = and <2 x i43> [[A]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i43> [[A:%.*]], splat (i43 23) +; CHECK-NEXT: [[C:%.*]] = and <2 x i43> [[A]], splat (i43 -8388608) ; CHECK-NEXT: [[D:%.*]] = mul <2 x i43> [[B]], [[C]] ; CHECK-NEXT: ret <2 x i43> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/ashr-demand.ll b/llvm/test/Transforms/InstCombine/ashr-demand.ll index a0e2af93b809bfb..43593612320f643 100644 --- a/llvm/test/Transforms/InstCombine/ashr-demand.ll +++ b/llvm/test/Transforms/InstCombine/ashr-demand.ll @@ -31,8 +31,8 @@ define i32 @srem8_ashr_mask(i32 %a0) { define <2 x i32> @srem2_ashr_mask_vector(<2 x i32> %a0) { ; CHECK-LABEL: @srem2_ashr_mask_vector( -; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SREM]], +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SREM]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MASK]] ; %srem = srem <2 x i32> %a0, @@ -43,8 +43,8 @@ define <2 x i32> @srem2_ashr_mask_vector(<2 x i32> %a0) { define <2 x i32> @srem2_ashr_mask_vector_nonconstant(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @srem2_ashr_mask_vector_nonconstant( -; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SREM]], +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SREM]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MASK]] ; %srem = srem <2 x i32> %a0, diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll index 1abf1be2cbedd94..60debf02b586212 100644 --- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll +++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll @@ -419,7 +419,7 @@ define i32 @ashr_lshr_no_lshr(i32 %x, i32 %y) { define <2 x i32> @ashr_lshr_vec_wrong_pred(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @ashr_lshr_vec_wrong_pred( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[L:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[L]], <2 x i32> [[R]] diff --git a/llvm/test/Transforms/InstCombine/avg-lsb.ll b/llvm/test/Transforms/InstCombine/avg-lsb.ll index 1e9e4e3bcafb272..f356dd1cab11ef7 100644 --- a/llvm/test/Transforms/InstCombine/avg-lsb.ll +++ b/llvm/test/Transforms/InstCombine/avg-lsb.ll @@ -34,7 +34,7 @@ define i8 @avg_lsb_mismatch(i8 %a, i8 %b) { define <2 x i8> @avg_lsb_vector(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: define <2 x i8> @avg_lsb_vector( ; CHECK-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) { -; CHECK-NEXT: [[REM:%.*]] = and <2 x i8> [[A]], +; CHECK-NEXT: [[REM:%.*]] = and <2 x i8> [[A]], splat (i8 1) ; CHECK-NEXT: [[DIV2:%.*]] = and <2 x i8> [[B]], [[REM]] ; CHECK-NEXT: ret <2 x i8> [[DIV2]] ; diff --git a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll index 4b5de41fc7095dd..75bdf66ab642840 100644 --- a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll +++ b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll @@ -336,7 +336,7 @@ define <2 x i8> @lshr_add_or_fail_dif_masks(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_or_or_good_mask( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; @@ -350,7 +350,7 @@ define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @shl_or_or_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_or_or_fail_bad_mask( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; @@ -394,7 +394,7 @@ define <2 x i8> @lshr_or_xor_good_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_or_xor_good_mask( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i8> [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], +; CHECK-NEXT: [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], splat (i8 6) ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = lshr <2 x i8> %x, @@ -406,8 +406,8 @@ define <2 x i8> @lshr_or_xor_good_mask(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @lshr_or_xor_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_or_xor_fail_bad_mask( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 6) +; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], splat (i8 6) ; CHECK-NEXT: [[BW2:%.*]] = or <2 x i8> [[SHIFT2]], ; CHECK-NEXT: [[BW1:%.*]] = xor <2 x i8> [[SHIFT1]], [[BW2]] ; CHECK-NEXT: ret <2 x i8> [[BW1]] @@ -633,7 +633,7 @@ define i8 @and_ashr_not_fail_invalid_xor_constant(i8 %x, i8 %y, i8 %shamt) { define <4 x i8> @and_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @and_ashr_not_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[AND]] @@ -647,7 +647,7 @@ define <4 x i8> @and_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { define <4 x i8> @and_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @and_ashr_not_vec_commuted( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[AND]] @@ -661,7 +661,7 @@ define <4 x i8> @and_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s define <4 x i8> @and_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @and_ashr_not_vec_poison_1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[AND]] @@ -767,7 +767,7 @@ define i8 @or_ashr_not_fail_invalid_xor_constant(i8 %x, i8 %y, i8 %shamt) { define <4 x i8> @or_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @or_ashr_not_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[OR]] @@ -781,7 +781,7 @@ define <4 x i8> @or_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { define <4 x i8> @or_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @or_ashr_not_vec_commuted( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[OR]] @@ -795,7 +795,7 @@ define <4 x i8> @or_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %sh define <4 x i8> @or_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @or_ashr_not_vec_poison_1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[OR]] @@ -902,7 +902,7 @@ define <4 x i8> @xor_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @xor_ashr_not_vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], +; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1) ; CHECK-NEXT: ret <4 x i8> [[XOR]] ; %x.shift = ashr <4 x i8> %x, %shamt @@ -916,7 +916,7 @@ define <4 x i8> @xor_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s ; CHECK-LABEL: @xor_ashr_not_vec_commuted( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], +; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1) ; CHECK-NEXT: ret <4 x i8> [[XOR]] ; %x.shift = ashr <4 x i8> %x, %shamt @@ -930,7 +930,7 @@ define <4 x i8> @xor_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s ; CHECK-LABEL: @xor_ashr_not_vec_poison_1( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], +; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1) ; CHECK-NEXT: ret <4 x i8> [[XOR]] ; %x.shift = ashr <4 x i8> %x, %shamt diff --git a/llvm/test/Transforms/InstCombine/binop-cast.ll b/llvm/test/Transforms/InstCombine/binop-cast.ll index 9d3b18c5e79ed59..7330d4409829950 100644 --- a/llvm/test/Transforms/InstCombine/binop-cast.ll +++ b/llvm/test/Transforms/InstCombine/binop-cast.ll @@ -151,7 +151,7 @@ define i32 @or_sext_to_sel(i32 %x, i1 %y) { define <2 x i32> @or_sext_to_sel_constant_vec(<2 x i1> %y) { ; CHECK-LABEL: @or_sext_to_sel_constant_vec( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> splat (i32 -1), <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %sext = sext <2 x i1> %y to <2 x i32> @@ -162,7 +162,7 @@ define <2 x i32> @or_sext_to_sel_constant_vec(<2 x i1> %y) { define <2 x i32> @or_sext_to_sel_swap(<2 x i32> %px, <2 x i1> %y) { ; CHECK-LABEL: @or_sext_to_sel_swap( ; CHECK-NEXT: [[X:%.*]] = mul <2 x i32> [[PX:%.*]], [[PX]] -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> , <2 x i32> [[X]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> splat (i32 -1), <2 x i32> [[X]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %x = mul <2 x i32> %px, %px ; thwart complexity-based canonicalization diff --git a/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll b/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll index a16ad4ddb806f67..e10f83f5eee0baf 100644 --- a/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll +++ b/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll @@ -179,7 +179,7 @@ define i8 @shl_or_commuted(i8 %x) { define <2 x i8> @shl_or_splat(<2 x i8> %x) { ; CHECK-LABEL: define <2 x i8> @shl_or_splat ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> , [[X]] +; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> splat (i8 22), [[X]] ; CHECK-NEXT: ret <2 x i8> [[BINOP]] ; %shift = shl <2 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll b/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll index b57b96db306ccc7..a3cd0328e9b5906 100644 --- a/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll +++ b/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll @@ -130,7 +130,7 @@ define i64 @select_zext_different_condition(i1 %c, i1 %d) { define <2 x i64> @vector_test(i1 %c) { ; CHECK-LABEL: define <2 x i64> @vector_test ; CHECK-SAME: (i1 [[C:%.*]]) { -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], <2 x i64> , <2 x i64> +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], <2 x i64> splat (i64 64), <2 x i64> splat (i64 1) ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[C]] to i64 ; CHECK-NEXT: [[VEC0:%.*]] = insertelement <2 x i64> poison, i64 [[EXT]], i64 0 ; CHECK-NEXT: [[VEC1:%.*]] = shufflevector <2 x i64> [[VEC0]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -215,8 +215,8 @@ define i6 @sub_select_zext_op_swapped_non_const_args(i1 %c, i6 %argT, i6 %argF) define <2 x i8> @vectorized_add(<2 x i1> %c, <2 x i8> %arg) { ; CHECK-LABEL: define <2 x i8> @vectorized_add ; CHECK-SAME: (<2 x i1> [[C:%.*]], <2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[ARG]], -; CHECK-NEXT: [[ADD:%.*]] = select <2 x i1> [[C]], <2 x i8> [[TMP1]], <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[ARG]], splat (i8 1) +; CHECK-NEXT: [[ADD:%.*]] = select <2 x i1> [[C]], <2 x i8> [[TMP1]], <2 x i8> splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[ADD]] ; %zext = zext <2 x i1> %c to <2 x i8> diff --git a/llvm/test/Transforms/InstCombine/binop-select.ll b/llvm/test/Transforms/InstCombine/binop-select.ll index 6cd4132eadd77b9..25f624ee134126f 100644 --- a/llvm/test/Transforms/InstCombine/binop-select.ll +++ b/llvm/test/Transforms/InstCombine/binop-select.ll @@ -113,7 +113,7 @@ define i32 @test_sub_deduce_false(i32 %x, i32 %y) { define <2 x i8> @test_sub_dont_deduce_with_undef_cond_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @test_sub_dont_deduce_with_undef_cond_vec( ; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> , <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> splat (i8 7), <2 x i8> [[Y:%.*]] ; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[X]], <2 x i8> [[COND]]) ; CHECK-NEXT: ret <2 x i8> [[SUB]] ; @@ -126,7 +126,7 @@ define <2 x i8> @test_sub_dont_deduce_with_undef_cond_vec(<2 x i8> %x, <2 x i8> define <2 x i8> @test_sub_dont_deduce_with_poison_cond_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @test_sub_dont_deduce_with_poison_cond_vec( ; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> , <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> splat (i8 7), <2 x i8> [[Y:%.*]] ; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[X]], <2 x i8> [[COND]]) ; CHECK-NEXT: ret <2 x i8> [[SUB]] ; @@ -369,7 +369,7 @@ define <2 x half> @fmul_sel_op1(i1 %b, <2 x half> %p) { define <2 x half> @fmul_sel_op1_use(i1 %b, <2 x half> %p) { ; CHECK-LABEL: @fmul_sel_op1_use( ; CHECK-NEXT: [[X:%.*]] = fadd <2 x half> [[P:%.*]], -; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], <2 x half> zeroinitializer, <2 x half> +; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], <2 x half> zeroinitializer, <2 x half> splat (half 0xHFFFF) ; CHECK-NEXT: call void @use_v2f16(<2 x half> [[S]]) ; CHECK-NEXT: [[R:%.*]] = fmul nnan nsz <2 x half> [[X]], [[S]] ; CHECK-NEXT: ret <2 x half> [[R]] diff --git a/llvm/test/Transforms/InstCombine/bit-checks.ll b/llvm/test/Transforms/InstCombine/bit-checks.ll index 3e3426d951eb9e3..43cd6dd1211b001 100644 --- a/llvm/test/Transforms/InstCombine/bit-checks.ll +++ b/llvm/test/Transforms/InstCombine/bit-checks.ll @@ -290,8 +290,8 @@ define i32 @main4(i32 %argc) { define <2 x i32> @main4_splat(<2 x i32> %argc) { ; CHECK-LABEL: @main4_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARGC:%.*]], -; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARGC:%.*]], splat (i32 55) +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 55) ; CHECK-NEXT: [[STOREMERGE:%.*]] = zext <2 x i1> [[AND_COND]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[STOREMERGE]] ; @@ -1356,7 +1356,7 @@ define <2 x i1> @no_masks_with_logical_or_vec_poison1(<2 x i32> %a, <2 x i32> %b ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> , <2 x i1> [[CMP2]] +; CHECK-NEXT: [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> splat (i1 true), <2 x i1> [[CMP2]] ; CHECK-NEXT: ret <2 x i1> [[OR2]] ; %cmp1 = icmp ne <2 x i32> %a, @@ -1371,8 +1371,8 @@ define <2 x i1> @no_masks_with_logical_or_vec_poison2(<2 x i32> %a, <2 x i32> %b ; CHECK-LABEL: @no_masks_with_logical_or_vec_poison2( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], -; CHECK-NEXT: [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> , <2 x i1> [[CMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> splat (i1 true), <2 x i1> [[CMP2]] ; CHECK-NEXT: ret <2 x i1> [[OR2]] ; %cmp1 = icmp ne <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll index 79665be01576a78..0551a5cb5e2f274 100644 --- a/llvm/test/Transforms/InstCombine/bit_ceil.ll +++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll @@ -268,11 +268,11 @@ define i32 @bit_ceil_32_sub_used_twice(i32 %x, ptr %p) { ; a vector version of @bit_ceil_32 above define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @bit_ceil_v4i32( -; CHECK-NEXT: [[DEC:%.*]] = add <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[DEC:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[DEC]], i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> zeroinitializer, [[CTLZ]] -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], -; CHECK-NEXT: [[SEL:%.*]] = shl nuw <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], splat (i32 31) +; CHECK-NEXT: [[SEL:%.*]] = shl nuw <4 x i32> splat (i32 1), [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[SEL]] ; %dec = add <4 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/bit_floor.ll b/llvm/test/Transforms/InstCombine/bit_floor.ll index bd8aabf4431c0a3..2872221e8aa87cf 100644 --- a/llvm/test/Transforms/InstCombine/bit_floor.ll +++ b/llvm/test/Transforms/InstCombine/bit_floor.ll @@ -151,10 +151,10 @@ define i32 @bit_floor_shl_used_twice(i32 %x, ptr %p) { define <4 x i32> @bit_floor_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @bit_floor_v4i32( ; CHECK-NEXT: [[EQ0:%.*]] = icmp eq <4 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X]], splat (i32 1) ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 1, 33) <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[LSHR]], i1 false) -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> , [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> , [[SUB]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> splat (i32 32), [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> splat (i32 1), [[SUB]] ; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[EQ0]], <4 x i32> zeroinitializer, <4 x i32> [[SHL]] ; CHECK-NEXT: ret <4 x i32> [[SEL]] ; diff --git a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll index 3744d8c9171c7d3..49e77009f3b1a0e 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll @@ -75,7 +75,7 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) { define <2 x i64> @is_negative(<4 x i32> %x) { ; CHECK-LABEL: @is_negative( -; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[NOTNOT:%.*]] = bitcast <4 x i32> [[X_LOBIT]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[NOTNOT]] ; @@ -91,7 +91,7 @@ define <2 x i64> @is_negative(<4 x i32> %x) { define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { ; CHECK-LABEL: @is_negative_bonus_bitcast( -; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[X_LOBIT]] ; %lobit = ashr <4 x i32> %x, @@ -107,7 +107,7 @@ define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) { ; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i8> [[B]] ; %a = and <4 x i4> %x, diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index 79e6370b7242f57..ce5a4635a9b0a2f 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -77,7 +77,7 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) { define <2 x i64> @is_negative(<4 x i32> %x) { ; CHECK-LABEL: @is_negative( -; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[NOTNOT:%.*]] = bitcast <4 x i32> [[X_LOBIT]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[NOTNOT]] ; @@ -93,7 +93,7 @@ define <2 x i64> @is_negative(<4 x i32> %x) { define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { ; CHECK-LABEL: @is_negative_bonus_bitcast( -; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[X_LOBIT]] ; %lobit = ashr <4 x i32> %x, @@ -109,7 +109,7 @@ define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) { ; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i8> [[B]] ; %a = and <4 x i4> %x, @@ -796,7 +796,7 @@ define double @copysign_idiom_f64(double %x, i64 %mag) { define <2 x float> @copysign_idiom_vec(<2 x float> %x) { ; CHECK-LABEL: @copysign_idiom_vec( -; CHECK-NEXT: [[Y:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[Y:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 1.000000e+00), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[Y]] ; %bits = bitcast <2 x float> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/bitreverse.ll b/llvm/test/Transforms/InstCombine/bitreverse.ll index fe44a7a77bdff86..2d303efbe66c825 100644 --- a/llvm/test/Transforms/InstCombine/bitreverse.ll +++ b/llvm/test/Transforms/InstCombine/bitreverse.ll @@ -362,7 +362,7 @@ define i32 @rev_i1(i1 %x) { define <2 x i8> @rev_v2i1(<2 x i1> %x) { ; CHECK-LABEL: @rev_v2i1( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> , <2 x i8> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> splat (i8 -128), <2 x i8> zeroinitializer ; CHECK-NEXT: ret <2 x i8> [[R]] ; %z = zext <2 x i1> %x to <2 x i8> diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index ddc0430896e7d1b..f7268ec9df09052 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -38,7 +38,7 @@ define i32 @lshr8_i32(i32 %x) { define <2 x i32> @lshr16_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @lshr16_v2i32( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = lshr <2 x i32> %x, @@ -222,7 +222,7 @@ define i16 @test7(i32 %A) { define <2 x i16> @test7_vector(<2 x i32> %A) { ; CHECK-LABEL: @test7_vector( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[D:%.*]] = trunc nuw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[D]] ; @@ -246,7 +246,7 @@ define i16 @test8(i64 %A) { define <2 x i16> @test8_vector(<2 x i64> %A) { ; CHECK-LABEL: @test8_vector( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 48) ; CHECK-NEXT: [[D:%.*]] = trunc nuw <2 x i64> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[D]] ; @@ -447,7 +447,7 @@ define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 { define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_and32ivec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -1585053440) ; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[T2]] ; @@ -458,7 +458,7 @@ define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 { define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_or32ivec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 -1585053440) ; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[T2]] ; @@ -469,7 +469,7 @@ define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 { define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_xor32ivec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1585053440) ; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[T2]] ; @@ -837,8 +837,8 @@ define i32 @bs_active_high7(i32 %0) { define <2 x i64> @bs_active_high4(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_high4( -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], splat (i64 4) +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 240) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = shl <2 x i64> %0, @@ -849,7 +849,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) { define <2 x i64> @bs_active_high_different(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_high_different( ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], splat (i64 56) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = shl <2 x i64> %0, @@ -932,7 +932,7 @@ define i32 @bs_active_byte_3h(i32 %0) { define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_byte_3h_v2( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], splat (i32 8) ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %2 = and <2 x i32> %0, ; 0x0080'0000, 0x0001'0000 @@ -966,7 +966,7 @@ define i16 @bs_active_low1(i16 %0) { define <2 x i32> @bs_active_low8(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_low8( -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], splat (i32 24) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %2 = and <2 x i32> %0, @@ -977,7 +977,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) { define <2 x i32> @bs_active_low_different(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_low_different( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], splat (i32 24) ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %2 = and <2 x i32> %0, @@ -1060,7 +1060,7 @@ define i32 @bs_active_byte_2l(i32 %0) { define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_byte_2l_v2( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], splat (i64 40) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = and <2 x i64> %0, ; 0x0100, 0xff00 diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index fa61bc841f0199e..44e1616af86c549 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -110,10 +110,10 @@ define <2 x i32> @test2_vector(<2 x i32> %arg) { define <2 x i32> @test2_vector_poison(<2 x i32> %arg) { ; CHECK-LABEL: @test2_vector_poison( ; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], -; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], +; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], splat (i32 8) ; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], ; CHECK-NEXT: [[T6:%.*]] = or disjoint <2 x i32> [[T2]], [[T5]] -; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], splat (i32 8) ; CHECK-NEXT: [[T9:%.*]] = and <2 x i32> [[T8]], ; CHECK-NEXT: [[T10:%.*]] = or disjoint <2 x i32> [[T6]], [[T9]] ; CHECK-NEXT: [[T12:%.*]] = lshr <2 x i32> [[ARG]], diff --git a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll index cde8efbafc5e5a8..b7aed751a989778 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll @@ -371,7 +371,7 @@ define i8 @positive_biggershl_shlnuwnsw_ashrexact(i8 %x) { define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @positive_samevar_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[RET]] ; @@ -386,7 +386,7 @@ define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec( -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %tmp0 = ashr <2 x i8> %x, @@ -397,7 +397,7 @@ define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 3) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = ashr <3 x i8> %x, @@ -407,7 +407,7 @@ define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -428,8 +428,8 @@ define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) { define <2 x i8> @positive_biggerashr_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerashr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %tmp0 = ashr <2 x i8> %x, @@ -440,7 +440,7 @@ define <2 x i8> @positive_biggerashr_vec(<2 x i8> %x) { define <3 x i8> @positive_biggerashr_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 3) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = ashr <3 x i8> %x, @@ -450,7 +450,7 @@ define <3 x i8> @positive_biggerashr_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_biggerashr_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 6) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -472,8 +472,8 @@ define <3 x i8> @positive_biggerashr_vec_undef2(<3 x i8> %x) { define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -64) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %tmp0 = ashr <2 x i8> %x, @@ -484,7 +484,7 @@ define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) { define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 6) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = ashr <3 x i8> %x, @@ -494,7 +494,7 @@ define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_biggershl_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -563,7 +563,7 @@ define i8 @positive_biggershl_multiuse(i8 %x) { define <2 x i8> @positive_biggerashr_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_nonsplat( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[TMP0]], ; CHECK-NEXT: ret <2 x i8> [[RET]] ; @@ -575,7 +575,7 @@ define <2 x i8> @positive_biggerashr_vec_nonsplat(<2 x i8> %x) { define <2 x i8> @positive_biggerLashr_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerLashr_vec_nonsplat( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[TMP0]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %tmp0 = ashr <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll index b5ef1f466958d7c..42c116f9d8f08c0 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll @@ -450,8 +450,8 @@ define i32 @n19_oneuse9(i32 %x, i32 %replacement_low, i32 %replacement_high) { define <2 x i32> @t20_ult_slt_vec_splat(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) { ; CHECK-LABEL: @t20_ult_slt_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 -16) +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], splat (i32 127) ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[REPLACEMENT_HIGH:%.*]], <2 x i32> [[TMP3]] ; CHECK-NEXT: ret <2 x i32> [[R]] @@ -486,9 +486,9 @@ declare void @use2xi1(<2 x i1>) declare void @use(<2 x i1>) define <2 x i32> @t22_uge_slt(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) { ; CHECK-LABEL: @t22_uge_slt( -; CHECK-NEXT: [[T0:%.*]] = icmp slt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[T1:%.*]] = select <2 x i1> [[T0]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[REPLACEMENT_HIGH:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[X]], +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[X]], splat (i32 16) ; CHECK-NEXT: [[T3:%.*]] = icmp uge <2 x i32> [[T2]], ; CHECK-NEXT: call void @use2xi1(<2 x i1> [[T3]]) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T1]], <2 x i32> [[X]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll index c555970ea43484b..58f171e3889f66e 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll @@ -312,7 +312,7 @@ define i32 @n16_oneuse6(i32 %x, i32 %replacement_low, i32 %replacement_high) { define <2 x i32> @t17_ult_slt_vec_splat(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) { ; CHECK-LABEL: @t17_ult_slt_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[REPLACEMENT_HIGH:%.*]], <2 x i32> [[TMP3]] ; CHECK-NEXT: ret <2 x i32> [[R]] @@ -341,7 +341,7 @@ define <2 x i32> @t18_ult_slt_vec_nonsplat(<2 x i32> %x, <2 x i32> %replacement_ define <3 x i32> @t19_ult_slt_vec_poison0(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { ; CHECK-LABEL: @t19_ult_slt_vec_poison0( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]] ; CHECK-NEXT: ret <3 x i32> [[R]] @@ -355,7 +355,7 @@ define <3 x i32> @t19_ult_slt_vec_poison0(<3 x i32> %x, <3 x i32> %replacement_l define <3 x i32> @t20_ult_slt_vec_poison1(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { ; CHECK-LABEL: @t20_ult_slt_vec_poison1( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]] ; CHECK-NEXT: ret <3 x i32> [[R]] @@ -369,7 +369,7 @@ define <3 x i32> @t20_ult_slt_vec_poison1(<3 x i32> %x, <3 x i32> %replacement_l define <3 x i32> @t21_ult_slt_vec_poison2(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { ; CHECK-LABEL: @t21_ult_slt_vec_poison2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]] ; CHECK-NEXT: ret <3 x i32> [[R]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll index 3d5696a02451395..8a7f1d259fe479c 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll @@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, @@ -81,7 +81,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) { define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_splat_poison( -; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %tmp0 = and <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll index 21daeb8983a85de..5f5a393641f205c 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll @@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll index e103fe94409869c..0a0656a2273cd0a 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll @@ -30,7 +30,7 @@ define i1 @p0(i8 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, @@ -60,7 +60,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) { define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_splat_poison( -; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %tmp0 = and <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll index bbd733e86a32deb..14d634c493780f0 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll @@ -39,7 +39,7 @@ define i1 @p0() { define <2 x i1> @p1_vec_splat() { ; CHECK-LABEL: @p1_vec_splat( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8() -; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = call <2 x i8> @gen2x8() diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll index b167c8ad25aa94a..c563c7c72b9c5ce 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll @@ -39,7 +39,7 @@ define i1 @p0() { define <2 x i1> @p1_vec_splat() { ; CHECK-LABEL: @p1_vec_splat( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8() -; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = call <2 x i8> @gen2x8() @@ -75,7 +75,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase() { define <3 x i1> @p3_vec_splat_poison() { ; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() -; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll index 828150244773282..a5aa887a0cb6ac2 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll @@ -30,7 +30,7 @@ define i1 @p0(i8 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll index 1dac73df3878966..c6a69688d0287e8 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll @@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, @@ -81,7 +81,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) { define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_splat_poison( -; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %tmp0 = and <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll index e8781407cb647fd..1a1a1d08741bed4 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll @@ -51,7 +51,7 @@ define i1 @pv(i8 %y) { define <2 x i1> @p1_vec_splat() { ; CHECK-LABEL: @p1_vec_splat( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8() -; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = call <2 x i8> @gen2x8() diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll index 54f00321c4cf020..fe9f26f26a18777 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll @@ -51,7 +51,7 @@ define i1 @pv(i8 %y) { define <2 x i1> @p1_vec_splat() { ; CHECK-LABEL: @p1_vec_splat( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8() -; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = call <2 x i8> @gen2x8() @@ -98,7 +98,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1() { define <3 x i1> @p3_vec_splat_poison() { ; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() -; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll index 7eda7bb58f27008..9a0f073071b17bd 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll @@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll b/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll index 297dc647e7c67f2..a85d7932f9b7efb 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll @@ -180,7 +180,7 @@ define i1 @oge_pinf_fmf(half %x) { define <2 x i1> @olt_pinf_vec(<2 x half> %x) { ; CHECK-LABEL: define <2 x i1> @olt_pinf_vec( ; CHECK-SAME: <2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = fcmp one <2 x half> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp one <2 x half> [[X]], splat (half 0xH7C00) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = fcmp olt <2 x half> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll index b706ff15f569cbd..7bc72b8a6c2d9b0 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll @@ -44,8 +44,8 @@ define i1 @pb(i65 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 4) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 8) ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp0 = shl <2 x i8> %x, @@ -70,7 +70,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) { define <3 x i1> @p3_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], splat (i8 5) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[TMP2]] ; @@ -82,7 +82,7 @@ define <3 x i1> @p3_vec_undef0(<3 x i8> %x) { define <3 x i1> @p4_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @p4_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[TMP2]] @@ -222,7 +222,7 @@ define i1 @n2(i8 %x, i8 %y) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll index 5a58fc96c6643fc..67096e19e9a02f0 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll @@ -30,7 +30,7 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[TMP0]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll index edd528b500e5578..6f7b5c2a42c2e9c 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll @@ -30,7 +30,7 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[TMP0]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll index fd56324f10dc383..8e1e5a56a59d022 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll @@ -42,7 +42,7 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i8> [[X_HIGHBITS]], zeroinitializer @@ -74,7 +74,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { define <3 x i1> @p3_vec_poison0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p3_vec_poison0( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll index 4d8ce5d9a6cca0d..8d6dfecffcd14a8 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll @@ -42,7 +42,7 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i8> [[X_HIGHBITS]], zeroinitializer @@ -74,7 +74,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { define <3 x i1> @p3_vec_poison0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p3_vec_poison0( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll index 5fab93092a050e5..12aa83636223e2c 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll @@ -40,9 +40,9 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> , [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[T1]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -58,7 +58,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p2_vec_poison0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i8> , [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> , [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> splat (i8 -1), [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ule <3 x i8> [[X:%.*]], [[T1]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll index 40a67ce1d60cb47..4e840a6fd1b6404 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll @@ -40,9 +40,9 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> , [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[T1]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -58,7 +58,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p2_vec_poison0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i8> , [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> , [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> splat (i8 -1), [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <3 x i8> [[X:%.*]], [[T1]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll index 719fdd036a01b64..2806bf69a24f2c0 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll @@ -371,7 +371,7 @@ define i8 @positive_biggershl_shlnuwnsw_lshrexact(i8 %x) { define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @positive_samevar_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[RET]] ; @@ -386,7 +386,7 @@ define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec( -; CHECK-NEXT: [[T0:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i8> [[T0]] ; %t0 = lshr <2 x i8> %x, @@ -397,7 +397,7 @@ define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 3) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %t0 = lshr <3 x i8> %x, @@ -407,7 +407,7 @@ define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -428,8 +428,8 @@ define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) { define <2 x i8> @positive_biggerlshr_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 24) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %t0 = lshr <2 x i8> %x, @@ -440,7 +440,7 @@ define <2 x i8> @positive_biggerlshr_vec(<2 x i8> %x) { define <3 x i8> @positive_biggerlshr_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 3) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %t0 = lshr <3 x i8> %x, @@ -450,7 +450,7 @@ define <3 x i8> @positive_biggerlshr_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_biggerlshr_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 6) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -472,8 +472,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef2(<3 x i8> %x) { define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -64) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %t0 = lshr <2 x i8> %x, @@ -484,7 +484,7 @@ define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) { define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 6) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %t0 = lshr <3 x i8> %x, @@ -494,7 +494,7 @@ define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_biggershl_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -563,7 +563,7 @@ define i8 @positive_biggershl_multiuse(i8 %x) { define <2 x i8> @positive_biggerlshr_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[T0]], ; CHECK-NEXT: ret <2 x i8> [[RET]] ; @@ -575,7 +575,7 @@ define <2 x i8> @positive_biggerlshr_vec_nonsplat(<2 x i8> %x) { define <2 x i8> @positive_biggerLlshr_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerLlshr_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[T0]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %t0 = lshr <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll index c1e871dcccddcf2..400d911929b9968 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll @@ -124,7 +124,7 @@ define i32 @positive_biggerLshr_shlnuw_lshrexact(i32 %x) { define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @positive_samevar_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> splat (i32 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -139,7 +139,7 @@ define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec( -; CHECK-NEXT: [[T0:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 134217727) ; CHECK-NEXT: ret <2 x i32> [[T0]] ; %t0 = shl <2 x i32> %x, @@ -150,7 +150,7 @@ define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) { define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 5) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %t0 = shl <3 x i32> %x, @@ -160,7 +160,7 @@ define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) { define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 5) ; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; @@ -181,8 +181,8 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) { define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 5) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], splat (i32 134217696) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %t0 = shl <2 x i32> %x, @@ -193,7 +193,7 @@ define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) { define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 5) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %t0 = shl <3 x i32> %x, @@ -203,7 +203,7 @@ define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) { define <3 x i32> @positive_biggerShl_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 10) ; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; @@ -225,8 +225,8 @@ define <3 x i32> @positive_biggerShl_vec_undef2(<3 x i32> %x) { define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 5) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], splat (i32 4194303) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %t0 = shl <2 x i32> %x, @@ -237,7 +237,7 @@ define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) { define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 10) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %t0 = shl <3 x i32> %x, @@ -247,7 +247,7 @@ define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) { define <3 x i32> @positive_biggerLshr_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 5) ; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; @@ -346,7 +346,7 @@ define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) { define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 5) ; CHECK-NEXT: [[RET:%.*]] = lshr <2 x i32> [[T0]], ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -358,7 +358,7 @@ define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) { define <2 x i32> @positive_biggerLshl_vec_nonsplat(<2 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshl_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <2 x i32> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = lshr <2 x i32> [[T0]], splat (i32 5) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %t0 = shl <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll index b9875fb9780fa9e..5a12ffede78a572 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll @@ -44,8 +44,8 @@ define i1 @pb(i65 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -4) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp0 = shl <2 x i8> %x, @@ -70,7 +70,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) { define <3 x i1> @p3_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], splat (i8 5) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[TMP2]] ; @@ -82,7 +82,7 @@ define <3 x i1> @p3_vec_undef0(<3 x i8> %x) { define <3 x i1> @p4_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @p4_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[TMP2]] @@ -222,7 +222,7 @@ define i1 @n2(i8 %x, i8 %y) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize.ll b/llvm/test/Transforms/InstCombine/canonicalize.ll index 0905898e2f44bc9..d571ae93ec425c4 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize.ll @@ -316,7 +316,7 @@ define i1 @canonicalize_ueq_arg_f32(float %x) { define <2 x i1> @canonicalize_ueq_arg_v2f32(<2 x float> %x) { ; CHECK-LABEL: @canonicalize_ueq_arg_v2f32( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %canon.x = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %x) %cmp = fcmp ueq <2 x float> %canon.x, %x diff --git a/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll index 68a386ec004237b..dfefa11b70a0208 100644 --- a/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll +++ b/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll @@ -533,7 +533,7 @@ define <2 x i1> @i32_vec_cast_cmp_oeq_vec_int_n0_sitofp(<2 x i32> %i) { define <2 x i1> @i32_vec_cast_cmp_oeq_vec_int_i32imax_sitofp(<2 x i32> %i) { ; CHECK-LABEL: @i32_vec_cast_cmp_oeq_vec_int_i32imax_sitofp( ; CHECK-NEXT: [[F:%.*]] = sitofp <2 x i32> [[I:%.*]] to <2 x float> -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[F]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[F]], splat (float 0x41E0000000000000) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %f = sitofp <2 x i32> %i to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll index 7b6d07a14a30e64..e9f6feba24cf679 100644 --- a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll +++ b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll @@ -631,7 +631,7 @@ define <3 x i1> @i32_cast_cmp_eq_int_0_sitofp_float_vec_poison(<3 x i32> %i) { define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison(<3 x i64> %i) { ; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i64> [[I:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i64> [[I:%.*]], splat (i64 1) ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = sitofp <3 x i64> %i to <3 x half> @@ -642,7 +642,7 @@ define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison(<3 x i64> %i) { define <3 x i1> @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_poison(<3 x i16> %i) { ; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_poison( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i16> [[I:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i16> [[I:%.*]], splat (i16 -1) ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = sitofp <3 x i16> %i to <3 x float> @@ -659,7 +659,7 @@ define <6 x i1> @i16_cast_cmp_sgt_int_m1_bitcast_vector_num_elements_sitofp(<3 x ; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_bitcast_vector_num_elements_sitofp( ; CHECK-NEXT: [[F:%.*]] = sitofp <3 x i16> [[I:%.*]] to <3 x float> ; CHECK-NEXT: [[B:%.*]] = bitcast <3 x float> [[F]] to <6 x i16> -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <6 x i16> [[B]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <6 x i16> [[B]], splat (i16 -1) ; CHECK-NEXT: ret <6 x i1> [[CMP]] ; %f = sitofp <3 x i16> %i to <3 x float> diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 0e44dc1b8ca9c0a..ca748a9483e9b28 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -223,7 +223,7 @@ define <2 x i1> @test19vec(<2 x i32> %X) { define <3 x i1> @test19vec2(<3 x i1> %X) { ; ALL-LABEL: @test19vec2( -; ALL-NEXT: [[CMPEQ:%.*]] = xor <3 x i1> [[X:%.*]], +; ALL-NEXT: [[CMPEQ:%.*]] = xor <3 x i1> [[X:%.*]], splat (i1 true) ; ALL-NEXT: ret <3 x i1> [[CMPEQ]] ; %sext = sext <3 x i1> %X to <3 x i32> @@ -347,8 +347,8 @@ define i1 @test31(i64 %A) { define <2 x i1> @test31vec(<2 x i64> %A) { ; ALL-LABEL: @test31vec( ; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = and <2 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[C]], +; ALL-NEXT: [[C:%.*]] = and <2 x i32> [[B]], splat (i32 42) +; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[C]], splat (i32 10) ; ALL-NEXT: ret <2 x i1> [[D]] ; %B = trunc <2 x i64> %A to <2 x i32> @@ -362,8 +362,8 @@ define <2 x i1> @test31vec(<2 x i64> %A) { define <2 x i1> @test32vec(<2 x i8> %A) { ; ALL-LABEL: @test32vec( -; ALL-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], -; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], +; ALL-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 42) +; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 10) ; ALL-NEXT: ret <2 x i1> [[D]] ; %B = zext <2 x i8> %A to <2 x i16> @@ -417,7 +417,7 @@ define i1 @test36(i32 %a) { define <2 x i1> @test36vec(<2 x i32> %a) { ; ALL-LABEL: @test36vec( -; ALL-NEXT: [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; ALL-NEXT: [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -1) ; ALL-NEXT: ret <2 x i1> [[D]] ; %b = lshr <2 x i32> %a, @@ -480,8 +480,8 @@ define i16 @test40(i16 %a) { define <2 x i16> @test40vec(<2 x i16> %a) { ; ALL-LABEL: @test40vec( -; ALL-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], -; ALL-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], +; ALL-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 9) +; ALL-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], splat (i16 8) ; ALL-NEXT: [[T32:%.*]] = or disjoint <2 x i16> [[T21]], [[T5]] ; ALL-NEXT: ret <2 x i16> [[T32]] ; @@ -615,8 +615,8 @@ define i64 @test46(i64 %A) { define <2 x i64> @test46vec(<2 x i64> %A) { ; ALL-LABEL: @test46vec( ; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = and <2 x i32> [[C]], +; ALL-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], splat (i32 8) +; ALL-NEXT: [[D:%.*]] = and <2 x i32> [[C]], splat (i32 10752) ; ALL-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[D]] to <2 x i64> ; ALL-NEXT: ret <2 x i64> [[E]] ; @@ -773,7 +773,7 @@ define i64 @test56(i16 %A) { define <2 x i64> @test56vec(<2 x i16> %A) { ; ALL-LABEL: @test56vec( ; ALL-NEXT: [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[P354:%.*]] = lshr <2 x i32> [[P353]], +; ALL-NEXT: [[P354:%.*]] = lshr <2 x i32> [[P353]], splat (i32 5) ; ALL-NEXT: [[P355:%.*]] = zext nneg <2 x i32> [[P354]] to <2 x i64> ; ALL-NEXT: ret <2 x i64> [[P355]] ; @@ -798,7 +798,7 @@ define i64 @test57(i64 %A) { define <2 x i64> @test57vec(<2 x i64> %A) { ; ALL-LABEL: @test57vec( ; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8) ; ALL-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[C]] to <2 x i64> ; ALL-NEXT: ret <2 x i64> [[E]] ; @@ -1506,7 +1506,7 @@ define i8 @trunc_lshr_sext_exact(i8 %A) { define <2 x i8> @trunc_lshr_sext_uniform(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uniform( -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1552,7 +1552,7 @@ define <2 x i8> @trunc_lshr_sext_uses1(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uses1( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1581,9 +1581,9 @@ define <2 x i8> @trunc_lshr_sext_uses3(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uses3( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 6) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8 >%A to <2 x i32> @@ -1596,7 +1596,7 @@ define <2 x i8> @trunc_lshr_sext_uses3(<2 x i8> %A) { define <2 x i8> @trunc_lshr_overshift_sext(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift_sext( -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 7) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1622,9 +1622,9 @@ define i8 @trunc_lshr_overshift_sext_uses1(i8 %A) { define <2 x i8> @trunc_lshr_overshift_sext_uses2(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift_sext_uses2( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 7) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1679,7 +1679,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses1(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_sext_wide_input_uses1( ; ALL-NEXT: [[B:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A]], splat (i16 9) ; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; @@ -1709,7 +1709,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses3(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_sext_wide_input_uses3( ; ALL-NEXT: [[B:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 9) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] @@ -1724,7 +1724,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses3(<2 x i16> %A) { define <2 x i8> @trunc_lshr_overshift_wide_input_sext(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_overshift_wide_input_sext( -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], splat (i16 15) ; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; @@ -1751,7 +1751,7 @@ define i8 @trunc_lshr_overshift_sext_wide_input_uses1(i16 %A) { define <2 x i8> @trunc_lshr_overshift_sext_wide_input_uses2(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_overshift_sext_wide_input_uses2( -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], splat (i16 15) ; ALL-NEXT: [[C:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> @@ -1797,7 +1797,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses1(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_narrow_input_uses1( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[A]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[A]], splat (i8 6) ; ALL-NEXT: [[D:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; ALL-NEXT: ret <2 x i16> [[D]] ; @@ -1827,7 +1827,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses3(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_narrow_input_uses3( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 6) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i16> ; ALL-NEXT: ret <2 x i16> [[D]] @@ -1842,7 +1842,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses3(<2 x i8> %A) { define <2 x i16> @trunc_lshr_overshift_narrow_input_sext(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift_narrow_input_sext( -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 7) ; ALL-NEXT: [[D:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; ALL-NEXT: ret <2 x i16> [[D]] ; @@ -1870,7 +1870,7 @@ define i16 @trunc_lshr_overshift_sext_narrow_input_uses1(i8 %A) { define <2 x i16> @trunc_lshr_overshift_sext_narrow_input_uses2(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift_sext_narrow_input_uses2( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i16> ; ALL-NEXT: ret <2 x i16> [[D]] @@ -1902,7 +1902,7 @@ define i16 @trunc_lshr_overshift_sext_narrow_input_uses3(i8 %A) { define <2 x i8> @trunc_lshr_overshift2_sext(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift2_sext( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 25) ; ALL-NEXT: [[D:%.*]] = trunc nuw nsw <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; @@ -1930,7 +1930,7 @@ define i8 @trunc_lshr_overshift2_sext_uses1(i8 %A) { define <2 x i8> @trunc_lshr_overshift2_sext_uses2(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift2_sext_uses2( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 25) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc nuw nsw <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] @@ -1983,7 +1983,7 @@ define i8 @trunc_lshr_zext_exact(i8 %A) { define <2 x i8> @trunc_lshr_zext_uniform(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_zext_uniform( -; ALL-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[TMP1]] ; %B = zext <2 x i8> %A to <2 x i32> @@ -2029,7 +2029,7 @@ define <2 x i8> @trunc_lshr_zext_uses1(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_zext_uses1( ; ALL-NEXT: [[B:%.*]] = zext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i8> [[A]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i8> [[A]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[C]] ; %B = zext <2 x i8> %A to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll index c6fee0914f0e781..a81259b147fb752 100644 --- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll +++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll @@ -567,8 +567,8 @@ define i32 @mixed_clamp_to_i32_2(float %x) { define <2 x float> @mixed_clamp_to_float_vec(<2 x i32> %x) { ; CHECK-LABEL: @mixed_clamp_to_float_vec( -; CHECK-NEXT: [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) -; CHECK-NEXT: [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> ) +; CHECK-NEXT: [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 255)) +; CHECK-NEXT: [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: [[R:%.*]] = uitofp nneg <2 x i32> [[R1]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll index 67815e41ecd34e3..19c4cc979d4ba54 100644 --- a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll @@ -43,7 +43,7 @@ define i1 @bswap_ne_i32(i32 %x) { define <2 x i1> @bswap_eq_v2i64(<2 x i64> %x) { ; CHECK-LABEL: @bswap_eq_v2i64( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 216172782113783808) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %bs = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %x) @@ -73,7 +73,7 @@ define i1 @ctlz_eq_zero_i32(i32 %x) { define <2 x i1> @ctlz_ne_zero_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_zero_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -93,7 +93,7 @@ define i1 @ctlz_eq_bw_minus_1_i32(i32 %x) { define <2 x i1> @ctlz_ne_bw_minus_1_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_bw_minus_1_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -114,8 +114,8 @@ define i1 @ctlz_eq_other_i32(i32 %x) { define <2 x i1> @ctlz_ne_other_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_other_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -128) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 128) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -211,7 +211,7 @@ define <2 x i1> @ctlz_ult_one_v2i32(<2 x i32> %x) { define <2 x i1> @ctlz_ult_other_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctlz_ult_other_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -223,7 +223,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) { ; CHECK-LABEL: @ctlz_ult_other_multiuse_v2i32( ; CHECK-NEXT: [[LZ:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false) ; CHECK-NEXT: store <2 x i32> [[LZ]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -234,7 +234,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) { define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctlz_ult_bw_minus_one_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -285,7 +285,7 @@ define i1 @cttz_eq_zero_i33(i33 %x) { define <2 x i1> @cttz_ne_zero_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_zero_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -306,7 +306,7 @@ define i1 @cttz_eq_bw_minus_1_i33(i33 %x) { define <2 x i1> @cttz_ne_bw_minus_1_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_bw_minus_1_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) @@ -327,8 +327,8 @@ define i1 @cttz_eq_other_i33(i33 %x) { define <2 x i1> @cttz_ne_other_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_other_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 31) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) @@ -417,7 +417,7 @@ define <2 x i1> @cttz_ult_one_v2i32(<2 x i32> %x) { define <2 x i1> @cttz_ult_other_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @cttz_ult_other_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65535) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -430,7 +430,7 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) { ; CHECK-LABEL: @cttz_ult_other_multiuse_v2i32( ; CHECK-NEXT: [[TZ:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false) ; CHECK-NEXT: store <2 x i32> [[TZ]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i32> [[TZ]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i32> [[TZ]], splat (i32 16) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false) @@ -441,7 +441,7 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) { define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @cttz_ult_bw_minus_one_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -492,7 +492,7 @@ define i1 @ctpop_eq_bitwidth_i8(i8 %x) { define <2 x i1> @ctpop_ne_bitwidth_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_ne_bitwidth_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x) @@ -515,7 +515,7 @@ define i1 @ctpop_ugt_bitwidth_minus_one_i8(i8 %x, ptr %p) { define <2 x i1> @ctpop_ult_bitwidth_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_ult_bitwidth_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x) @@ -792,7 +792,7 @@ define i1 @bitreverse_ult_22_fail_not_equality_pred(i8 %x) { define <2 x i1> @bitreverse_vec_eq_2_2(<2 x i8> %x) { ; CHECK-LABEL: @bitreverse_vec_eq_2_2( -; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 64) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> %x) diff --git a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll index ed8bfd5d669d9eb..dcd79f58390023e 100644 --- a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll @@ -203,7 +203,7 @@ define i1 @class_inf_or_fcmp_issubnormal(half %x) { define <2 x i1> @class_finite_or_fcmp_issubnormal_vector(<2 x half> %x) { ; CHECK-LABEL: @class_finite_or_fcmp_issubnormal_vector( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]]) -; CHECK-NEXT: [[OR:%.*]] = fcmp one <2 x half> [[TMP1]], +; CHECK-NEXT: [[OR:%.*]] = fcmp one <2 x half> [[TMP1]], splat (half 0xH7C00) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) diff --git a/llvm/test/Transforms/InstCombine/compare-signs.ll b/llvm/test/Transforms/InstCombine/compare-signs.ll index ede652869212c10..9703b47b44d0c29 100644 --- a/llvm/test/Transforms/InstCombine/compare-signs.ll +++ b/llvm/test/Transforms/InstCombine/compare-signs.ll @@ -50,7 +50,7 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone { define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec( ; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], +; CHECK-NEXT: [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -64,7 +64,7 @@ define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { define <2 x i32> @test3vec_poison1(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec_poison1( ; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], +; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], splat (i32 16777216) ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -78,7 +78,7 @@ define <2 x i32> @test3vec_poison1(<2 x i32> %a, <2 x i32> %b) nounwind readnone define <2 x i32> @test3vec_poison2(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec_poison2( ; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], +; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], splat (i32 131072) ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -93,8 +93,8 @@ define <2 x i32> @test3vec_poison2(<2 x i32> %a, <2 x i32> %b) nounwind readnone define <2 x i32> @test3vec_diff(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec_diff( -; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 31) +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 30) ; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]] ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] @@ -154,7 +154,7 @@ define i1 @test4a(i32 %a) { define <2 x i1> @test4a_vec(<2 x i32> %a) { ; CHECK-LABEL: @test4a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %l = ashr <2 x i32> %a, @@ -194,7 +194,7 @@ define i1 @test4c(i64 %a) { define <2 x i1> @test4c_vec(<2 x i64> %a) { ; CHECK-LABEL: @test4c_vec( -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i64> [[A:%.*]], splat (i64 1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %l = ashr <2 x i64> %a, @@ -221,11 +221,11 @@ define i1 @shift_trunc_signbit_test(i32 %x) { define <2 x i1> @shift_trunc_signbit_test_vec_uses(<2 x i17> %x, ptr %p1, ptr %p2) { ; CHECK-LABEL: @shift_trunc_signbit_test_vec_uses( -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i17> [[X:%.*]], splat (i17 4) ; CHECK-NEXT: store <2 x i17> [[SH]], ptr [[P1:%.*]], align 8 ; CHECK-NEXT: [[TR:%.*]] = trunc nuw <2 x i17> [[SH]] to <2 x i13> ; CHECK-NEXT: store <2 x i13> [[TR]], ptr [[P2:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i17> [[X]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i17> [[X]], splat (i17 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %sh = lshr <2 x i17> %x, diff --git a/llvm/test/Transforms/InstCombine/compare-udiv.ll b/llvm/test/Transforms/InstCombine/compare-udiv.ll index 20e719cafa6abd8..6b8108dc8ecfa97 100644 --- a/llvm/test/Transforms/InstCombine/compare-udiv.ll +++ b/llvm/test/Transforms/InstCombine/compare-udiv.ll @@ -92,7 +92,7 @@ define i1 @test5(i32 %d) { define <2 x i1> @test5vec(<2 x i32> %d) { ; CHECK-LABEL: @test5vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %div = udiv <2 x i32> , %d %cmp1 = icmp ne <2 x i32> %div, zeroinitializer @@ -111,7 +111,7 @@ define i1 @test6(i32 %d) { define <2 x i1> @test6vec(<2 x i32> %d) { ; CHECK-LABEL: @test6vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 6) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -150,7 +150,7 @@ define i1 @test8(i32 %d) { define <2 x i1> @test8vec(<2 x i32> %d) { ; CHECK-LABEL: @test8vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -170,7 +170,7 @@ define i1 @test9(i32 %d) { define <2 x i1> @test9vec(<2 x i32> %d) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -190,7 +190,7 @@ define i1 @test10(i32 %d) { define <2 x i1> @test10vec(<2 x i32> %d) { ; CHECK-LABEL: @test10vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -210,7 +210,7 @@ define i1 @test11(i32 %d) { define <2 x i1> @test11vec(<2 x i32> %d) { ; CHECK-LABEL: @test11vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -230,7 +230,7 @@ define i1 @test12(i32 %d) { define <2 x i1> @test12vec(<2 x i32> %d) { ; CHECK-LABEL: @test12vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -250,7 +250,7 @@ define i1 @test13(i32 %d) { define <2 x i1> @test13vec(<2 x i32> %d) { ; CHECK-LABEL: @test13vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -270,7 +270,7 @@ define i1 @test14(i32 %d) { define <2 x i1> @test14vec(<2 x i32> %d) { ; CHECK-LABEL: @test14vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -309,7 +309,7 @@ define i1 @test16(i32 %d) { define <2 x i1> @test16vec(<2 x i32> %d) { ; CHECK-LABEL: @test16vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %div = udiv <2 x i32> , %d %cmp1 = icmp ult <2 x i32> %div, diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll index 22a7c0c072d9635..3e5468d2f8bed5e 100644 --- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll @@ -62,7 +62,7 @@ define ptr addrspace(1) @fold_2x_smaller_index_type(ptr addrspace(1) %p, i32 %m0 define <2 x ptr> @fold_2x_vec_i64(<2 x ptr> %p, <2 x i64> %m0) { ; CHECK-LABEL: define <2 x ptr> @fold_2x_vec_i64 ; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M0:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M0]], splat (i64 -2) ; CHECK-NEXT: [[P1:%.*]] = call align 2 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[TMP1]]) ; CHECK-NEXT: ret <2 x ptr> [[P1]] ; diff --git a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll index a63eeab6a4b1e9c..ce3355b6df039e9 100644 --- a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll +++ b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll @@ -333,7 +333,7 @@ entry: define <4 x half> @copysign_splat(<4 x half> %a) { ; CHECK-LABEL: @copysign_splat( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RET:%.*]] = call <4 x half> @llvm.copysign.v4f16(<4 x half> , <4 x half> [[A:%.*]]) +; CHECK-NEXT: [[RET:%.*]] = call <4 x half> @llvm.copysign.v4f16(<4 x half> splat (half 0xH3C00), <4 x half> [[A:%.*]]) ; CHECK-NEXT: ret <4 x half> [[RET]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll b/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll index 12c608cd6c2bb98..74a1e318d77edee 100644 --- a/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll @@ -1779,7 +1779,7 @@ define i1 @not_isfinite_or_zero_f16_daz(half %x) #1 { define <2 x i1> @not_isfinite_or_zero_v2f16_daz(<2 x half> %x) #1 { ; CHECK-LABEL: @not_isfinite_or_zero_v2f16_daz( ; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]]) -; CHECK-NEXT: [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], +; CHECK-NEXT: [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], splat (half 0xH7C00) ; CHECK-NEXT: [[CMPZERO:%.*]] = fcmp oeq <2 x half> [[X]], zeroinitializer ; CHECK-NEXT: [[CLASS:%.*]] = or <2 x i1> [[CMPZERO]], [[CMPINF]] ; CHECK-NEXT: ret <2 x i1> [[CLASS]] @@ -1810,7 +1810,7 @@ define i1 @not_isfinite_or_zero_f16_dynamic(half %x) #2 { define <2 x i1> @not_isfinite_or_zero_v2f16_dynamic(<2 x half> %x) #2 { ; CHECK-LABEL: @not_isfinite_or_zero_v2f16_dynamic( ; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]]) -; CHECK-NEXT: [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], +; CHECK-NEXT: [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], splat (half 0xH7C00) ; CHECK-NEXT: [[CMPZERO:%.*]] = fcmp oeq <2 x half> [[X]], zeroinitializer ; CHECK-NEXT: [[CLASS:%.*]] = or <2 x i1> [[CMPZERO]], [[CMPINF]] ; CHECK-NEXT: ret <2 x i1> [[CLASS]] diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll index 15aa87f72c49a1a..bdceeb5ef9785cb 100644 --- a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll +++ b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll @@ -64,7 +64,7 @@ define i32 @shl_cttz_true(i32) { define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) { ; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_true( ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], +; CHECK-NEXT: [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], splat (i32 9) ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; %div = lshr <2 x i32> , %0 @@ -75,7 +75,7 @@ define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) { define <2 x i32> @vec2_shl_nuw_ctlz_true(<2 x i32>) { ; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_ctlz_true( ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> , [[TMP0]] +; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> splat (i32 9), [[TMP0]] ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; %shl = shl nuw <2 x i32> , %0 @@ -86,7 +86,7 @@ define <2 x i32> @vec2_shl_nuw_ctlz_true(<2 x i32>) { define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true(<2 x i32>) { ; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true( ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> , [[TMP0]] +; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> splat (i32 9), [[TMP0]] ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; %shl = shl nuw nsw <2 x i32> , %0 diff --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll index bcfbce8dfd3d222..4c5d7a7dc011baa 100644 --- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll @@ -21,7 +21,7 @@ define i32 @ctpop1(i32 %0) { define <2 x i32> @ctpop1v(<2 x i32> %0) { ; CHECK-LABEL: @ctpop1v( ; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false) -; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw <2 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw <2 x i32> splat (i32 32), [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %2 = sub <2 x i32> zeroinitializer, %0 diff --git a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll index 17997b25d096c14..e86d32de51f3675 100644 --- a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll @@ -102,7 +102,7 @@ define <2 x i32> @ctpop_shl2_1_vec(<2 x i32> %x) { define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_lshr_intmin_intmin_plus1_vec_nz( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> , [[X1]] ; CHECK-NEXT: [[CNT:%.*]] = call range(i32 1, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]]) ; CHECK-NEXT: ret <2 x i32> [[CNT]] @@ -116,7 +116,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) { define <2 x i32> @ctpop_shl2_1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_shl2_1_vec_nz( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %and = and <2 x i32> %x, %shl = shl <2 x i32> , %and @@ -140,7 +140,7 @@ define <2 x i64> @ctpop_x_and_negx_vec(<2 x i64> %x) { define <2 x i32> @ctpop_x_and_negx_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_x_and_negx_vec_nz( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %x1 = or <2 x i32> %x, %sub = sub <2 x i32> , %x1 diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index 940bb868c4c6158..217a3c308a2ddbe 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -105,9 +105,9 @@ define i8 @mask_one_bit(i8 %x) { define <2 x i32> @mask_one_bit_splat(<2 x i32> %x, ptr %p) { ; CHECK-LABEL: @mask_one_bit_splat( -; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2048) ; CHECK-NEXT: store <2 x i32> [[A]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i32> [[A]], +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i32> [[A]], splat (i32 11) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = and <2 x i32> %x, @@ -146,7 +146,7 @@ define i7 @_parity_of_not_odd_type(i7 %x) { define <2 x i32> @_parity_of_not_vec(<2 x i32> %x) { ; CHECK-LABEL: @_parity_of_not_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %neg = xor <2 x i32> %x, @@ -158,7 +158,7 @@ define <2 x i32> @_parity_of_not_vec(<2 x i32> %x) { define <2 x i32> @_parity_of_not_poison(<2 x i32> %x) { ; CHECK-LABEL: @_parity_of_not_poison( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %neg = xor <2 x i32> %x, @@ -213,7 +213,7 @@ define i32 @ctpop_add_no_common_bits(i32 %a, i32 %b) { define <2 x i32> @ctpop_add_no_common_bits_vec(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @ctpop_add_no_common_bits_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> splat (i32 16)) ; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[RES]] ; @@ -227,9 +227,9 @@ define <2 x i32> @ctpop_add_no_common_bits_vec(<2 x i32> %a, <2 x i32> %b) { define <2 x i32> @ctpop_add_no_common_bits_vec_use(<2 x i32> %a, <2 x i32> %b, ptr %p) { ; CHECK-LABEL: @ctpop_add_no_common_bits_vec_use( -; CHECK-NEXT: [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[CTPOP1:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL16]]) -; CHECK-NEXT: [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 16) ; CHECK-NEXT: [[CTPOP2:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[LSHL16]]) ; CHECK-NEXT: store <2 x i32> [[CTPOP2]], ptr [[P:%.*]], align 8 ; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i32> [[CTPOP1]], [[CTPOP2]] @@ -246,10 +246,10 @@ define <2 x i32> @ctpop_add_no_common_bits_vec_use(<2 x i32> %a, <2 x i32> %b, p define <2 x i32> @ctpop_add_no_common_bits_vec_use2(<2 x i32> %a, <2 x i32> %b, ptr %p) { ; CHECK-LABEL: @ctpop_add_no_common_bits_vec_use2( -; CHECK-NEXT: [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[CTPOP1:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL16]]) ; CHECK-NEXT: store <2 x i32> [[CTPOP1]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 16) ; CHECK-NEXT: [[CTPOP2:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[LSHL16]]) ; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i32> [[CTPOP1]], [[CTPOP2]] ; CHECK-NEXT: ret <2 x i32> [[RES]] @@ -321,7 +321,7 @@ define i8 @sub_ctpop_unknown(i8 %a, i8 %b) { define <2 x i32> @sub_ctpop_vec(<2 x i32> %a) { ; CHECK-LABEL: @sub_ctpop_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[RES]] ; @@ -334,7 +334,7 @@ define <2 x i32> @sub_ctpop_vec_extra_use(<2 x i32> %a, ptr %p) { ; CHECK-LABEL: @sub_ctpop_vec_extra_use( ; CHECK-NEXT: [[CNT:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[A:%.*]]) ; CHECK-NEXT: store <2 x i32> [[CNT]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[RES:%.*]] = sub nuw nsw <2 x i32> , [[CNT]] +; CHECK-NEXT: [[RES:%.*]] = sub nuw nsw <2 x i32> splat (i32 32), [[CNT]] ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) @@ -412,7 +412,7 @@ define <2 x i32> @parity_xor_vec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @parity_xor_vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[ARG1:%.*]], [[ARG:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]]) -; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[TMP2]], +; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[I4]] ; %i = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %arg) diff --git a/llvm/test/Transforms/InstCombine/demorgan.ll b/llvm/test/Transforms/InstCombine/demorgan.ll index 460758d512bb38b..fa44694b278fa7e 100644 --- a/llvm/test/Transforms/InstCombine/demorgan.ll +++ b/llvm/test/Transforms/InstCombine/demorgan.ll @@ -428,7 +428,7 @@ define i32 @demorgan_and_zext(i1 %X, i1 %Y) { define <2 x i32> @demorgan_or_zext_vec(<2 x i1> %X, <2 x i1> %Y) { ; CHECK-LABEL: @demorgan_or_zext_vec( ; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and <2 x i1> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i1> [[OR1_DEMORGAN]], +; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i1> [[OR1_DEMORGAN]], splat (i1 true) ; CHECK-NEXT: [[OR:%.*]] = zext <2 x i1> [[OR1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[OR]] ; @@ -443,7 +443,7 @@ define <2 x i32> @demorgan_or_zext_vec(<2 x i1> %X, <2 x i1> %Y) { define <2 x i32> @demorgan_and_zext_vec(<2 x i1> %X, <2 x i1> %Y) { ; CHECK-LABEL: @demorgan_and_zext_vec( ; CHECK-NEXT: [[AND1_DEMORGAN:%.*]] = or <2 x i1> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[AND1:%.*]] = xor <2 x i1> [[AND1_DEMORGAN]], +; CHECK-NEXT: [[AND1:%.*]] = xor <2 x i1> [[AND1_DEMORGAN]], splat (i1 true) ; CHECK-NEXT: [[AND:%.*]] = zext <2 x i1> [[AND1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[AND]] ; diff --git a/llvm/test/Transforms/InstCombine/dependent-ivs.ll b/llvm/test/Transforms/InstCombine/dependent-ivs.ll index e4a042ff5fe5154..218e619643dfbb9 100644 --- a/llvm/test/Transforms/InstCombine/dependent-ivs.ll +++ b/llvm/test/Transforms/InstCombine/dependent-ivs.ll @@ -167,7 +167,7 @@ define void @int_iv_vector(<2 x i64> %base) { ; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV2:%.*]] = add <2 x i64> [[IV]], [[BASE]] ; CHECK-NEXT: call void @use.v2i64(<2 x i64> [[IV2]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4) ; CHECK-NEXT: [[CMP:%.*]] = call i1 @get.i1() ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: @@ -198,7 +198,7 @@ define void @int_iv_vector_poison_invalid(<2 x i64> %base) { ; CHECK-NEXT: [[IV2:%.*]] = phi <2 x i64> [ [[IV2_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ , [[ENTRY]] ] ; CHECK-NEXT: call void @use.v2i64(<2 x i64> [[IV2]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4) ; CHECK-NEXT: [[IV2_NEXT]] = add <2 x i64> [[IV_NEXT]], [[BASE]] ; CHECK-NEXT: [[CMP:%.*]] = call i1 @get.i1() ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] @@ -670,7 +670,7 @@ define void @ptr_iv_vector2(<2 x ptr> %base) { ; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_PTR:%.*]] = getelementptr i8, <2 x ptr> [[BASE]], <2 x i64> [[IV]] ; CHECK-NEXT: call void @use.v2p0(<2 x ptr> [[IV_PTR]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4) ; CHECK-NEXT: [[CMP:%.*]] = call i1 @get.i1() ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll index 9610746811a43a6..8dd6d4a2e837120 100644 --- a/llvm/test/Transforms/InstCombine/div-shift.ll +++ b/llvm/test/Transforms/InstCombine/div-shift.ll @@ -25,7 +25,7 @@ define i32 @t1(i16 zeroext %x, i32 %y) { define <2 x i32> @t1vec(<2 x i16> %x, <2 x i32> %y) { ; CHECK-LABEL: @t1vec( ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 1) ; CHECK-NEXT: [[D1:%.*]] = lshr <2 x i32> [[CONV]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[D1]] ; @@ -236,7 +236,7 @@ define i32 @t10(i32 %x, i32 %y) { define <2 x i32> @t11(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t11( -; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i32> splat (i32 1), [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shl = shl nsw <2 x i32> %x, %y @@ -287,7 +287,7 @@ define i32 @t15(i32 %x, i32 %y) { define <2 x i32> @t16(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t16( -; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i32> splat (i32 1), [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shl = shl nuw <2 x i32> %x, %y @@ -568,7 +568,7 @@ define i5 @udiv_shl_mul_nuw_exact(i5 %x, i5 %y, i5 %z) { define <2 x i4> @udiv_shl_mul_nuw_vec(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @udiv_shl_mul_nuw_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i4> , [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i4> splat (i4 1), [[Z:%.*]] ; CHECK-NEXT: [[D:%.*]] = udiv <2 x i4> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i4> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll index d5d7ce9b7b26367..33a8e12dfa1a682 100644 --- a/llvm/test/Transforms/InstCombine/div.ll +++ b/llvm/test/Transforms/InstCombine/div.ll @@ -88,7 +88,7 @@ define i32 @udiv_by_minus1(i32 %A) { define <2 x i64> @udiv_by_minus1_vec(<2 x i64> %x) { ; CHECK-LABEL: @udiv_by_minus1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -1) ; CHECK-NEXT: [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[DIV]] ; @@ -109,7 +109,7 @@ define i32 @udiv_by_sext_all_ones(i1 %x, i32 %y) { define <2 x i32> @udiv_by_sext_all_ones_vec(<2 x i1> %x, <2 x i32> %y) { ; CHECK-LABEL: @udiv_by_sext_all_ones_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[Y:%.*]], splat (i32 -1) ; CHECK-NEXT: [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[DIV]] ; @@ -152,8 +152,8 @@ define i1 @test7(i32 %A) { define <2 x i1> @test7vec(<2 x i32> %A) { ; CHECK-LABEL: @test7vec( -; CHECK-NEXT: [[A_OFF:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A_OFF]], +; CHECK-NEXT: [[A_OFF:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -20) +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A_OFF]], splat (i32 10) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = udiv <2 x i32> %A, @@ -174,7 +174,7 @@ define i1 @test8(i8 %A) { define <2 x i1> @test8vec(<2 x i8> %A) { ; CHECK-LABEL: @test8vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> [[A:%.*]], splat (i8 -11) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = udiv <2 x i8> %A, @@ -195,7 +195,7 @@ define i1 @test9(i8 %A) { define <2 x i1> @test9vec(<2 x i8> %A) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 -10) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = udiv <2 x i8> %A, @@ -266,7 +266,7 @@ define i32 @test15(i32 %a, i32 %b) { define <2 x i64> @test16(<2 x i64> %x) { ; CHECK-LABEL: @test16( -; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i64> [[X:%.*]], splat (i64 192) ; CHECK-NEXT: ret <2 x i64> [[DIV]] ; %shr = lshr <2 x i64> %x, @@ -286,7 +286,7 @@ define i32 @test19(i32 %x) { define <2 x i32> @test19vec(<2 x i32> %x) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[A:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[A]] ; @@ -309,8 +309,8 @@ define i32 @test20(i32 %x) { define <2 x i32> @test20vec(<2 x i32> %x) { ; CHECK-LABEL: @test20vec( ; CHECK-NEXT: [[X_FR:%.*]] = freeze <2 x i32> [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X_FR]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X_FR]], splat (i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 3) ; CHECK-NEXT: [[A:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[X_FR]], <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[A]] ; @@ -441,7 +441,7 @@ define i32 @test32(i32 %a, i32 %b) { define <2 x i64> @test33(<2 x i64> %x) { ; CHECK-LABEL: @test33( -; CHECK-NEXT: [[DIV:%.*]] = udiv exact <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = udiv exact <2 x i64> [[X:%.*]], splat (i64 192) ; CHECK-NEXT: ret <2 x i64> [[DIV]] ; %shr = lshr exact <2 x i64> %x, @@ -463,7 +463,7 @@ define i8 @sdiv_negated_dividend_constant_divisor(i8 %x) { define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_splat( -; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: ret <2 x i8> [[D]] ; %neg = sub nsw <2 x i8> zeroinitializer, %x @@ -483,7 +483,7 @@ define i8 @sdiv_exact_negated_dividend_constant_divisor(i8 %x) { define <2 x i8> @sdiv_exact_negated_dividend_constant_divisor_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_exact_negated_dividend_constant_divisor_vec_splat( -; CHECK-NEXT: [[D:%.*]] = sdiv exact <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[D:%.*]] = sdiv exact <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: ret <2 x i8> [[D]] ; %neg = sub nsw <2 x i8> zeroinitializer, %x @@ -504,7 +504,7 @@ define i8 @sdiv_negated_dividend_constant_divisor_smin(i8 %x) { define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_splat_smin(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_splat_smin( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; @@ -568,8 +568,8 @@ define i32 @test35(i32 %A) { define <2 x i32> @test35vec(<2 x i32> %A) { ; CHECK-LABEL: @test35vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) +; CHECK-NEXT: [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], splat (i32 2147483647) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %and = and <2 x i32> %A, @@ -591,7 +591,7 @@ define i32 @test36(i32 %A) { define <2 x i32> @test36vec(<2 x i32> %A) { ; CHECK-LABEL: @test36vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[MUL:%.*]] = lshr exact <2 x i32> [[AND]], [[A]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; @@ -656,7 +656,7 @@ define i32 @zap(i8 %x) { define <3 x i32> @shrink_vec(<3 x i8> %x) { ; CHECK-LABEL: @shrink_vec( -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: [[DIV:%.*]] = sext <3 x i8> [[TMP1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[DIV]] ; @@ -667,7 +667,7 @@ define <3 x i32> @shrink_vec(<3 x i8> %x) { define <2 x i32> @zap_vec(<2 x i8> %x) { ; CHECK-LABEL: @zap_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[DIV]] ; @@ -740,8 +740,8 @@ define i8 @div_factor_signed(i8 %x, i8 %y) { define <2 x i8> @div_factor_signed_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @div_factor_signed_vec( ; CHECK-NEXT: [[Y_FR:%.*]] = freeze <2 x i8> [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y_FR]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y_FR]], splat (i8 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 3) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i8> [[Y_FR]], <2 x i8> zeroinitializer ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -767,7 +767,7 @@ define i8 @div_factor_unsigned(i8 %x, i8 %y) { define <2 x i8> @div_factor_unsigned_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @div_factor_unsigned_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[Y:%.*]], splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -874,7 +874,7 @@ define <2 x i64> @test_exact_vec(<2 x i64> %x) { define <2 x i8> @negate_sdiv_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @negate_sdiv_vec_splat( -; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 -42) ; CHECK-NEXT: ret <2 x i8> [[DIV_NEG]] ; %div = sdiv <2 x i8> %x, @@ -909,7 +909,7 @@ define <2 x i8> @negate_sdiv_vec_splat_one(<2 x i8> %x) { define <2 x i8> @negate_sdiv_vec_splat_signed_min(<2 x i8> %x) { ; CHECK-LABEL: @negate_sdiv_vec_splat_signed_min( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[DIV_NEG]] ; @@ -1033,7 +1033,7 @@ define i8 @sdiv_by_int_min(i8 %x) { define <2 x i8> @sdiv_by_int_min_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_by_int_min_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; @@ -1051,7 +1051,7 @@ define <2 x i8> @sdiv_by_int_min_vec_splat_poison(<2 x i8> %x) { define <2 x i8> @sdiv_by_negconst_v2i8(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_by_negconst_v2i8( -; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[DIV_NEG]] ; %div = sdiv <2 x i8> %x, @@ -1071,7 +1071,7 @@ define @sdiv_by_negconst_nxv2i8( %x) { define <2 x i8> @sdiv_by_minSigned_v2i8(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_by_minSigned_v2i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[DIV_NEG]] ; @@ -1151,7 +1151,7 @@ define i32 @sdiv_constant_dividend_select_divisor2(i1 %b, i32 %x) { define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) { ; CHECK-LABEL: @sdiv_constant_dividend_select_of_constants_divisor_vec( -; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> , <2 x i8> splat (i8 -10) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %s = select i1 %b, <2 x i8> , <2 x i8> @@ -1163,7 +1163,7 @@ define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) { define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) { ; CHECK-LABEL: @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1( -; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> , <2 x i8> splat (i8 -10) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %s = select i1 %b, <2 x i8> , <2 x i8> @@ -1745,7 +1745,7 @@ define i32 @sdiv_distribute_mul_nsw_add_nsw_uses(i32 %x) { define <2 x i6> @udiv_distribute_mul_nuw_add_nuw(<2 x i6> %x) { ; CHECK-LABEL: @udiv_distribute_mul_nuw_add_nuw( -; CHECK-NEXT: [[DIV:%.*]] = add nuw <2 x i6> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = add nuw <2 x i6> [[X:%.*]], splat (i6 5) ; CHECK-NEXT: ret <2 x i6> [[DIV]] ; %mul = mul nuw <2 x i6> %x, diff --git a/llvm/test/Transforms/InstCombine/eq-of-parts.ll b/llvm/test/Transforms/InstCombine/eq-of-parts.ll index afe5d6af1fcd493..00ee7bf643286f9 100644 --- a/llvm/test/Transforms/InstCombine/eq-of-parts.ll +++ b/llvm/test/Transforms/InstCombine/eq-of-parts.ll @@ -179,9 +179,9 @@ define i1 @eq_21_comm_eq2(i32 %x, i32 %y) { define <2x i1> @eq_21_vector(<2x i32> %x, <2x i32> %y) { ; CHECK-LABEL: @eq_21_vector( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], splat (i32 8) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16> ; CHECK-NEXT: [[C_210:%.*]] = icmp eq <2 x i16> [[TMP2]], [[TMP4]] ; CHECK-NEXT: ret <2 x i1> [[C_210]] @@ -835,9 +835,9 @@ define i1 @ne_21_comm_ne2(i32 %x, i32 %y) { define <2x i1> @ne_21_vector(<2x i32> %x, <2x i32> %y) { ; CHECK-LABEL: @ne_21_vector( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], splat (i32 8) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16> ; CHECK-NEXT: [[C_210:%.*]] = icmp ne <2 x i16> [[TMP2]], [[TMP4]] ; CHECK-NEXT: ret <2 x i1> [[C_210]] diff --git a/llvm/test/Transforms/InstCombine/exact.ll b/llvm/test/Transforms/InstCombine/exact.ll index ccad6f974ffb1eb..c7377ab17d540e1 100644 --- a/llvm/test/Transforms/InstCombine/exact.ll +++ b/llvm/test/Transforms/InstCombine/exact.ll @@ -21,7 +21,7 @@ define i32 @sdiv2(i32 %x) { define <2 x i32> @sdiv2_vec(<2 x i32> %x) { ; CHECK-LABEL: @sdiv2_vec( -; CHECK-NEXT: [[Y:%.*]] = ashr exact <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = ashr exact <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[Y]] ; %y = sdiv exact <2 x i32> %x, @@ -105,8 +105,8 @@ define i64 @ashr1(i64 %X) { define <2 x i64> @ashr1_vec(<2 x i64> %X) { ; CHECK-LABEL: @ashr1_vec( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[X:%.*]], splat (i64 8) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 2) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %A = shl <2 x i64> %X, @@ -137,7 +137,7 @@ define i1 @ashr_icmp2(i64 %X) { define <2 x i1> @ashr_icmp2_vec(<2 x i64> %X) { ; CHECK-LABEL: @ashr_icmp2_vec( -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i64> [[X:%.*]], splat (i64 16) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %Y = ashr exact <2 x i64> %X, @@ -163,10 +163,10 @@ define i1 @pr9998(i32 %V) { ; FIXME: Vectors should fold the same way. define <2 x i1> @pr9998vec(<2 x i32> %V) { ; CHECK-LABEL: @pr9998vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[X:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[Y:%.*]] = sext <2 x i32> [[X]] to <2 x i64> -; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], +; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], splat (i64 7297771788697658747) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %W = shl <2 x i32> %V, @@ -248,7 +248,7 @@ define i1 @sdiv_icmp2(i64 %X) { define <2 x i1> @sdiv_icmp2_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp2_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 5) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %A = sdiv exact <2 x i64> %X, @@ -268,7 +268,7 @@ define i1 @sdiv_icmp3(i64 %X) { define <2 x i1> @sdiv_icmp3_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp3_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -5) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %A = sdiv exact <2 x i64> %X, @@ -308,7 +308,7 @@ define i1 @sdiv_icmp5(i64 %X) { define <2 x i1> @sdiv_icmp5_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp5_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -5) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %A = sdiv exact <2 x i64> %X, @@ -328,7 +328,7 @@ define i1 @sdiv_icmp6(i64 %X) { define <2 x i1> @sdiv_icmp6_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp6_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 5) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %A = sdiv exact <2 x i64> %X, @@ -402,7 +402,7 @@ define i8 @mul_of_sdiv_fail_ub(i8 %x) { define <2 x i8> @mul_of_sdiv_fail_ub_non_splat(<2 x i8> %x) { ; CHECK-LABEL: @mul_of_sdiv_fail_ub_non_splat( ; CHECK-NEXT: [[DIV:%.*]] = sdiv exact <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i8> [[DIV]], +; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i8> [[DIV]], splat (i8 6) ; CHECK-NEXT: ret <2 x i8> [[MUL]] ; %div = sdiv exact <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll index d8bd0a4d8159db2..0502540b7f7e92c 100644 --- a/llvm/test/Transforms/InstCombine/exp2-1.ll +++ b/llvm/test/Transforms/InstCombine/exp2-1.ll @@ -309,22 +309,22 @@ define float @sitofp_scalar_intrinsic_with_FMF(i8 %x) { define <2 x float> @sitofp_vector_intrinsic_with_FMF(<2 x i8> %x) { ; LDEXP32-LABEL: @sitofp_vector_intrinsic_with_FMF( ; LDEXP32-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32> -; LDEXP32-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; LDEXP32-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; LDEXP32-NEXT: ret <2 x float> [[R]] ; ; LDEXP16-LABEL: @sitofp_vector_intrinsic_with_FMF( ; LDEXP16-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i16> -; LDEXP16-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> , <2 x i16> [[TMP1]]) +; LDEXP16-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> splat (float 1.000000e+00), <2 x i16> [[TMP1]]) ; LDEXP16-NEXT: ret <2 x float> [[R]] ; ; NOLDEXPF-LABEL: @sitofp_vector_intrinsic_with_FMF( ; NOLDEXPF-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32> -; NOLDEXPF-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; NOLDEXPF-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; NOLDEXPF-NEXT: ret <2 x float> [[R]] ; ; NOLDEXP-LABEL: @sitofp_vector_intrinsic_with_FMF( ; NOLDEXP-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32> -; NOLDEXP-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; NOLDEXP-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; NOLDEXP-NEXT: ret <2 x float> [[R]] ; %s = sitofp <2 x i8> %x to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll index 969020140cb3391..c2c7ece8483a9e5 100644 --- a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll +++ b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll @@ -30,7 +30,7 @@ define <2 x float> @exp2_v2f32_sitofp_v2i8(<2 x i8> %x) { ; CHECK-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8( ; CHECK-SAME: <2 x i8> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32> -; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i8> %x to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fabs-as-int.ll index 4e49ff159f875d0..9d28cae8f04d639 100644 --- a/llvm/test/Transforms/InstCombine/fabs-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fabs-as-int.ll @@ -19,7 +19,7 @@ define <2 x i32> @fabs_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimplicitfl ; CHECK-LABEL: define <2 x i32> @fabs_as_int_v2f32_noimplicitfloat ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[BC]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[BC]], splat (i32 2147483647) ; CHECK-NEXT: ret <2 x i32> [[AND]] ; %bc = bitcast <2 x float> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/fabs-copysign.ll b/llvm/test/Transforms/InstCombine/fabs-copysign.ll index 438760b76f20391..e1d9f8bf2a4b980 100644 --- a/llvm/test/Transforms/InstCombine/fabs-copysign.ll +++ b/llvm/test/Transforms/InstCombine/fabs-copysign.ll @@ -31,7 +31,7 @@ define double @fabs_copysign_commuted(double %x) { define <4 x double> @fabs_copysign_vec(<4 x double> %x) { ; CHECK-LABEL: @fabs_copysign_vec( -; CHECK-NEXT: [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> , <4 x double> [[X:%.*]]) +; CHECK-NEXT: [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> splat (double 1.000000e+00), <4 x double> [[X:%.*]]) ; CHECK-NEXT: ret <4 x double> [[DIV]] ; %f = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) @@ -41,7 +41,7 @@ define <4 x double> @fabs_copysign_vec(<4 x double> %x) { define <4 x double> @fabs_copysign_vec_commuted(<4 x double> %x) { ; CHECK-LABEL: @fabs_copysign_vec_commuted( -; CHECK-NEXT: [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> , <4 x double> [[X:%.*]]) +; CHECK-NEXT: [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> splat (double 1.000000e+00), <4 x double> [[X:%.*]]) ; CHECK-NEXT: ret <4 x double> [[DIV]] ; %f = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) diff --git a/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll b/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll index a1988e5d1638d78..dd8d0aed3210e1b 100644 --- a/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll +++ b/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll @@ -137,7 +137,7 @@ define float @fabs_fneg_no_fabs(float %x) { define <2 x float> @fabs_fneg_splat_v2f32(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @fabs_fneg_splat_v2f32( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 2.000000e+00) ; %neg = fneg <2 x float> %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %neg) diff --git a/llvm/test/Transforms/InstCombine/fadd.ll b/llvm/test/Transforms/InstCombine/fadd.ll index 840ccaef1086ab1..36c387969d05d62 100644 --- a/llvm/test/Transforms/InstCombine/fadd.ll +++ b/llvm/test/Transforms/InstCombine/fadd.ll @@ -478,7 +478,7 @@ define float @fadd_fmul_common_op(float %x) { define <2 x float> @fadd_fmul_common_op_vec(<2 x float> %x) { ; CHECK-LABEL: @fadd_fmul_common_op_vec( -; CHECK-NEXT: [[A:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], splat (float 4.300000e+01) ; CHECK-NEXT: ret <2 x float> [[A]] ; %m = fmul reassoc nsz <2 x float> %x, diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll index 32f136d53fab4b6..5a80cf56712035a 100644 --- a/llvm/test/Transforms/InstCombine/fast-math.ll +++ b/llvm/test/Transforms/InstCombine/fast-math.ll @@ -587,7 +587,7 @@ define float @fdiv2(float %x) { define <2 x float> @fdiv2_vec(<2 x float> %x) { ; CHECK-LABEL: @fdiv2_vec( -; CHECK-NEXT: [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], +; CHECK-NEXT: [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <2 x float> [[DIV1]] ; %mul = fmul <2 x float> %x, diff --git a/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll b/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll index 54dbb09cb8fd31e..5a181ead4016ca9 100644 --- a/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll +++ b/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll @@ -186,7 +186,7 @@ define <2 x i1> @test_and_olt_poison(<2 x float> %x) { ; CHECK-LABEL: define <2 x i1> @test_and_olt_poison( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) -; CHECK-NEXT: [[COND:%.*]] = fcmp olt <2 x float> [[TMP1]], +; CHECK-NEXT: [[COND:%.*]] = fcmp olt <2 x float> [[TMP1]], splat (float 0x3C00000000000000) ; CHECK-NEXT: ret <2 x i1> [[COND]] ; %cmp1 = fcmp olt <2 x float> %x, diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll index 0d45baddcb2fc77..119cffd73c662c2 100644 --- a/llvm/test/Transforms/InstCombine/fcmp.ll +++ b/llvm/test/Transforms/InstCombine/fcmp.ll @@ -32,7 +32,7 @@ define i1 @fpext_constant(float %a) { define <2 x i1> @fpext_constant_vec_splat(<2 x half> %a) { ; CHECK-LABEL: @fpext_constant_vec_splat( -; CHECK-NEXT: [[CMP:%.*]] = fcmp nnan ole <2 x half> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp nnan ole <2 x half> [[A:%.*]], splat (half 0xH5140) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %ext = fpext <2 x half> %a to <2 x double> @@ -780,7 +780,7 @@ define i1 @lossy_une(half %x) { define <2 x i1> @lossy_ogt(<2 x float> %x) { ; CHECK-LABEL: @lossy_ogt( ; CHECK-NEXT: [[E:%.*]] = fpext <2 x float> [[X:%.*]] to <2 x double> -; CHECK-NEXT: [[R:%.*]] = fcmp ogt <2 x double> [[E]], +; CHECK-NEXT: [[R:%.*]] = fcmp ogt <2 x double> [[E]], splat (double 1.000000e-01) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %e = fpext <2 x float> %x to <2 x double> @@ -826,7 +826,7 @@ define i1 @lossy_ole(half %x) { define <2 x i1> @lossy_ugt(<2 x float> %x) { ; CHECK-LABEL: @lossy_ugt( ; CHECK-NEXT: [[E:%.*]] = fpext <2 x float> [[X:%.*]] to <2 x double> -; CHECK-NEXT: [[R:%.*]] = fcmp ugt <2 x double> [[E]], +; CHECK-NEXT: [[R:%.*]] = fcmp ugt <2 x double> [[E]], splat (double 1.000000e-01) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %e = fpext <2 x float> %x to <2 x double> @@ -1222,7 +1222,7 @@ define i1 @bitcast_eq0(i32 %x) { define <2 x i1> @bitcast_ne0(<2 x i32> %x) { ; CHECK-LABEL: @bitcast_ne0( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index 12d6e6463de657b..06c78a8c6206dcd 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -78,7 +78,7 @@ define float @not_exact_but_allow_recip_but_denorm(float %x) { define <2 x float> @exact_inverse_splat(<2 x float> %x) { ; CHECK-LABEL: @exact_inverse_splat( -; CHECK-NEXT: [[DIV:%.*]] = fmul <2 x float> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = fmul <2 x float> [[X:%.*]], splat (float 2.500000e-01) ; CHECK-NEXT: ret <2 x float> [[DIV]] ; %div = fdiv <2 x float> %x, @@ -98,7 +98,7 @@ define @exact_inverse_scalable_splat( % define <2 x float> @not_exact_but_allow_recip_splat(<2 x float> %x) { ; CHECK-LABEL: @not_exact_but_allow_recip_splat( -; CHECK-NEXT: [[DIV:%.*]] = fmul arcp <2 x float> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = fmul arcp <2 x float> [[X:%.*]], splat (float 0x3FD5555560000000) ; CHECK-NEXT: ret <2 x float> [[DIV]] ; %div = fdiv arcp <2 x float> %x, @@ -116,7 +116,7 @@ define <2 x float> @exact_inverse_vec(<2 x float> %x) { define <2 x float> @not_exact_inverse_splat(<2 x float> %x) { ; CHECK-LABEL: @not_exact_inverse_splat( -; CHECK-NEXT: [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <2 x float> [[DIV]] ; %div = fdiv <2 x float> %x, @@ -465,7 +465,7 @@ define float @div_factor_too_strict(float %x, float %y) { define <2 x float> @div_factor_commute(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @div_factor_commute( -; CHECK-NEXT: [[D:%.*]] = fdiv reassoc nnan ninf nsz <2 x float> , [[Y:%.*]] +; CHECK-NEXT: [[D:%.*]] = fdiv reassoc nnan ninf nsz <2 x float> splat (float 1.000000e+00), [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[D]] ; %m = fmul <2 x float> %y, %x @@ -959,7 +959,7 @@ define float @fdiv_nnan_zero_f32(float %x) { define <2 x float> @fdiv_nnan_zero_v2f32(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_zero_v2f32( -; CHECK-NEXT: [[FDIV:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[FDIV:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[FDIV]] ; %fdiv = fdiv nnan <2 x float> %x, zeroinitializer @@ -977,7 +977,7 @@ define float @fdiv_nnan_zero_f32_fmf(float %x) { define <2 x float> @fdiv_nnan_zero_v2f32_fmf(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_zero_v2f32_fmf( -; CHECK-NEXT: [[FDIV:%.*]] = call nnan nsz <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[FDIV:%.*]] = call nnan nsz <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[FDIV]] ; %fdiv = fdiv nnan nsz <2 x float> %x, zeroinitializer @@ -1031,7 +1031,7 @@ define double @test_negative_zero(double %X) { define <2 x double> @test_positive_zero_vector_nsz(<2 x double> %X) { ; CHECK-LABEL: @test_positive_zero_vector_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = fdiv nnan nsz <2 x double> %X, @@ -1040,7 +1040,7 @@ define <2 x double> @test_positive_zero_vector_nsz(<2 x double> %X) { define <2 x double> @test_negative_zero_vector_nsz(<2 x double> %X) { ; CHECK-LABEL: @test_negative_zero_vector_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = fdiv nnan nsz <2 x double> %X, @@ -1049,7 +1049,7 @@ define <2 x double> @test_negative_zero_vector_nsz(<2 x double> %X) { define <2 x double> @test_positive_zero_vector(<2 x double> %X) { ; CHECK-LABEL: @test_positive_zero_vector( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan <2 x double> @llvm.copysign.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call nnan <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = fdiv nnan <2 x double> %X, diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll index 4590bbb7abaeb80..ae0067d41426cf3 100644 --- a/llvm/test/Transforms/InstCombine/fma.ll +++ b/llvm/test/Transforms/InstCombine/fma.ll @@ -587,7 +587,7 @@ define <2 x double> @fma_const_fmul_one2(<2 x double> %b) { define <2 x double> @fma_nan_and_const_0(<2 x double> %b) { ; CHECK-LABEL: @fma_nan_and_const_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> , <2 x double> %b) ret <2 x double> %res @@ -595,7 +595,7 @@ define <2 x double> @fma_nan_and_const_0(<2 x double> %b) { define <2 x double> @fma_nan_and_const_1(<2 x double> %b) { ; CHECK-LABEL: @fma_nan_and_const_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> , <2 x double> %b) ret <2 x double> %res @@ -603,7 +603,7 @@ define <2 x double> @fma_nan_and_const_1(<2 x double> %b) { define <2 x double> @fma_nan_and_const_2(<2 x double> %b) { ; CHECK-LABEL: @fma_nan_and_const_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> %b, <2 x double> ) ret <2 x double> %res @@ -611,7 +611,7 @@ define <2 x double> @fma_nan_and_const_2(<2 x double> %b) { define <2 x double> @fma_undef_0(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_undef_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> %b, <2 x double> %c) ret <2 x double> %res @@ -619,7 +619,7 @@ define <2 x double> @fma_undef_0(<2 x double> %b, <2 x double> %c) { define <2 x double> @fma_undef_1(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_undef_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> , <2 x double> %c) ret <2 x double> %res @@ -627,7 +627,7 @@ define <2 x double> @fma_undef_1(<2 x double> %b, <2 x double> %c) { define <2 x double> @fma_undef_2(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_undef_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> ) ret <2 x double> %res @@ -663,14 +663,14 @@ define <2 x double> @fma_partial_undef_2(<2 x double> %b, <2 x double> %c) { define <2 x double> @fma_nan_0(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_nan_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> %b, <2 x double> %c) ret <2 x double> %res } define <2 x double> @fma_nan_1(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_nan_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> , <2 x double> %c) ret <2 x double> %res @@ -678,7 +678,7 @@ define <2 x double> @fma_nan_1(<2 x double> %b, <2 x double> %c) { define <2 x double> @fma_nan_2(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_nan_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> ) ret <2 x double> %res @@ -695,7 +695,7 @@ define <2 x double> @fmuladd_const_fmul(<2 x double> %b) { define <2 x double> @fmuladd_nan_and_const_0(<2 x double> %b) { ; CHECK-LABEL: @fmuladd_nan_and_const_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> , <2 x double> %b) ret <2 x double> %res @@ -703,7 +703,7 @@ define <2 x double> @fmuladd_nan_and_const_0(<2 x double> %b) { define <2 x double> @fmuladd_nan_and_const_1(<2 x double> %b) { ; CHECK-LABEL: @fmuladd_nan_and_const_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> , <2 x double> %b) ret <2 x double> %res @@ -711,7 +711,7 @@ define <2 x double> @fmuladd_nan_and_const_1(<2 x double> %b) { define <2 x double> @fmuladd_nan_and_const_2(<2 x double> %b) { ; CHECK-LABEL: @fmuladd_nan_and_const_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> %b, <2 x double> ) ret <2 x double> %res @@ -719,7 +719,7 @@ define <2 x double> @fmuladd_nan_and_const_2(<2 x double> %b) { define <2 x double> @fmuladd_nan_0(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_nan_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> %b, <2 x double> %c) ret <2 x double> %res @@ -727,7 +727,7 @@ define <2 x double> @fmuladd_nan_0(<2 x double> %b, <2 x double> %c) { define <2 x double> @fmuladd_nan_1(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_nan_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> , <2 x double> %c) ret <2 x double> %res @@ -735,7 +735,7 @@ define <2 x double> @fmuladd_nan_1(<2 x double> %b, <2 x double> %c) { define <2 x double> @fmuladd_undef_0(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_undef_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> %b, <2 x double> %c) ret <2 x double> %res @@ -743,7 +743,7 @@ define <2 x double> @fmuladd_undef_0(<2 x double> %b, <2 x double> %c) { define <2 x double> @fmuladd_undef_1(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_undef_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> , <2 x double> %c) ret <2 x double> %res @@ -751,7 +751,7 @@ define <2 x double> @fmuladd_undef_1(<2 x double> %b, <2 x double> %c) { define <2 x double> @fmuladd_undef_2(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_undef_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> ) ret <2 x double> %res diff --git a/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll index 716781564ca70fa..3c598f91fad410b 100644 --- a/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll @@ -10,7 +10,7 @@ define void @test8(ptr %inout, i1 %c1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ , [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] ; CHECK-NEXT: br i1 [[C1:%.*]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP0]] = insertelement <4 x float> [[LOCAL_VAR_7_0]], float 0.000000e+00, i64 2 diff --git a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll index 72ac2f18f113a4c..2166db8f5c4e59e 100644 --- a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll +++ b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll @@ -118,7 +118,7 @@ define <2 x float> @x_add_y_rsqrt_reassociate_extra_use(<2 x float> %x, <2 x flo ; CHECK-LABEL: @x_add_y_rsqrt_reassociate_extra_use( ; CHECK-NEXT: [[ADD:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ADD]]) -; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x float> , [[SQRT]] +; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[SQRT]] ; CHECK-NEXT: [[RES:%.*]] = fdiv fast <2 x float> [[ADD]], [[SQRT]] ; CHECK-NEXT: store <2 x float> [[RSQRT]], ptr [[P:%.*]], align 8 ; CHECK-NEXT: ret <2 x float> [[RES]] diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index 4554b4ed8844de8..51b70ef7e98004b 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -377,7 +377,7 @@ define void @test8(ptr %inout, i1 %c1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ , [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] ; CHECK-NEXT: br i1 [[C1:%.*]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP0]] = insertelement <4 x float> [[LOCAL_VAR_7_0]], float 0.000000e+00, i64 2 @@ -821,8 +821,8 @@ define float @fmul_fadd_distribute(float %x) { define <2 x float> @fmul_fadd_distribute_vec(<2 x float> %x) { ; CHECK-LABEL: @fmul_fadd_distribute_vec( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[X:%.*]], -; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 6.000000e+03) +; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], splat (float 1.200000e+07) ; CHECK-NEXT: ret <2 x float> [[T3]] ; %t1 = fadd reassoc <2 x float> , %x @@ -1164,7 +1164,7 @@ define float @negate_if_false(float %x, i1 %cond) { define <2 x double> @negate_if_true_commute(<2 x double> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_commute( -; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> splat (double 4.200000e+01), [[PX:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = fneg ninf <2 x double> [[X]] ; CHECK-NEXT: [[R:%.*]] = select ninf i1 [[COND:%.*]], <2 x double> [[TMP1]], <2 x double> [[X]] ; CHECK-NEXT: ret <2 x double> [[R]] @@ -1207,8 +1207,8 @@ define float @negate_if_true_extra_use(float %x, i1 %cond) { define <2 x double> @negate_if_true_wrong_constant(<2 x double> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_wrong_constant( -; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> splat (double 4.200000e+01), [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> splat (double 1.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]] ; CHECK-NEXT: ret <2 x double> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/fneg-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-as-int.ll index e3067b0d024614a..f8d88b4f238f27d 100644 --- a/llvm/test/Transforms/InstCombine/fneg-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fneg-as-int.ll @@ -19,7 +19,7 @@ define <2 x i32> @fneg_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimplicitfl ; CHECK-LABEL: define <2 x i32> @fneg_as_int_v2f32_noimplicitfloat ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[BC]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[BC]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; %bc = bitcast <2 x float> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll index 8c3e6958fe083e3..8b245bdd792993b 100644 --- a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll @@ -17,7 +17,7 @@ define <2 x i32> @fneg_fabs_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimpli ; CHECK-LABEL: define <2 x i32> @fneg_fabs_as_int_v2f32_noimplicitfloat ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[BC]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[BC]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[OR]] ; %bc = bitcast <2 x float> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll index 3461b19d0d70021..f28262b2a77e04d 100644 --- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll +++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll @@ -47,7 +47,7 @@ define <4 x float> @h1(i1 %A, <4 x i32> %B) { ; CHECK-LABEL: @h1( ; CHECK-NEXT: EntryBlock: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> -; CHECK-NEXT: [[BC:%.*]] = select i1 [[A:%.*]], <4 x float> , <4 x float> [[TMP0]] +; CHECK-NEXT: [[BC:%.*]] = select i1 [[A:%.*]], <4 x float> splat (float 0x36A0000000000000), <4 x float> [[TMP0]] ; CHECK-NEXT: ret <4 x float> [[BC]] ; EntryBlock: diff --git a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll index 6d5fde364c23531..0ed9927325d9293 100644 --- a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll +++ b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll @@ -144,7 +144,7 @@ define <2 x i1> @fold_cmp_ne_ctpop_c(<2 x i8> %x) { define <2 x i1> @fold_cmp_ne_ctpop_var_fail(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @fold_cmp_ne_ctpop_var_fail( -; CHECK-NEXT: [[NX:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[NX:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[CNT:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[CNT]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -214,7 +214,7 @@ define <2 x i1> @fold_cmp_ugt_ctpop_c(<2 x i8> %x) { define <2 x i1> @fold_cmp_ugt_ctpop_c_out_of_range_fail(<2 x i8> %x) { ; CHECK-LABEL: @fold_cmp_ugt_ctpop_c_out_of_range_fail( -; CHECK-NEXT: [[NX:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[NX:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[CNT:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]) ; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i8> [[CNT]], ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll index caf38c676e20d79..ff6d9aa4ea522ed 100644 --- a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll +++ b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll @@ -63,7 +63,7 @@ define float @ldexp_by_5_if_0_oeq_zero_f32(float %x) { define <2 x float> @ldexp_by_5_if_0_oeq_zero_v2f32(<2 x float> %x) { ; CHECK-LABEL: @ldexp_by_5_if_0_oeq_zero_v2f32( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq <2 x float> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[SCALED_X:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[SCALED_X:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> splat (i32 5)) ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = select <2 x i1> [[X_IS_ZERO]], <2 x float> [[SCALED_X]], <2 x float> [[X]] ; CHECK-NEXT: ret <2 x float> [[SCALED_IF_DENORMAL]] ; diff --git a/llvm/test/Transforms/InstCombine/fold-select-trunc.ll b/llvm/test/Transforms/InstCombine/fold-select-trunc.ll index 5567d7d5e1fca93..0c7ca0c4fd00599 100644 --- a/llvm/test/Transforms/InstCombine/fold-select-trunc.ll +++ b/llvm/test/Transforms/InstCombine/fold-select-trunc.ll @@ -59,7 +59,7 @@ define i8 @fold_select_trunc_negative(i8 %x, i8 %y) { define <2 x i8> @fold_select_trunc_vector(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @fold_select_trunc_vector( ; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw <2 x i8> [[X:%.*]] to <2 x i1> -; CHECK-NEXT: [[RET:%.*]] = select <2 x i1> [[TRUNC]], <2 x i8> , <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[RET:%.*]] = select <2 x i1> [[TRUNC]], <2 x i8> splat (i8 1), <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %trunc = trunc nuw <2 x i8> %x to <2 x i1> diff --git a/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll b/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll index a5c7cb3306ed08f..be02eb6dd334633 100644 --- a/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll +++ b/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll @@ -38,8 +38,8 @@ define i1 @pow2_or_zero_is_negative_commute(i8 %A) { define <2 x i1> @pow2_or_zero_is_negative_vec(<2 x i8> %x) { ; CHECK-LABEL: @pow2_or_zero_is_negative_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: call void @use_i1_vec(<2 x i1> [[CMP_2]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -53,8 +53,8 @@ define <2 x i1> @pow2_or_zero_is_negative_vec(<2 x i8> %x) { define <2 x i1> @pow2_or_zero_is_negative_vec_commute(<2 x i8> %A) { ; CHECK-LABEL: @pow2_or_zero_is_negative_vec_commute( -; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = mul <2 x i8> , %A ; thwart complexity-based canonicalization @@ -94,8 +94,8 @@ define i1 @pow2_or_zero_is_not_negative_commute(i8 %A) { define <2 x i1> @pow2_or_zero_is_not_negative_vec(<2 x i8> %x) { ; CHECK-LABEL: @pow2_or_zero_is_not_negative_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ne <2 x i8> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[X:%.*]], splat (i8 -128) +; CHECK-NEXT: [[CMP_2:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: call void @use_i1_vec(<2 x i1> [[CMP_2]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -109,8 +109,8 @@ define <2 x i1> @pow2_or_zero_is_not_negative_vec(<2 x i8> %x) { define <2 x i1> @pow2_or_zero_is_not_negative_vec_commute(<2 x i8> %A) { ; CHECK-LABEL: @pow2_or_zero_is_not_negative_vec_commute( -; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = mul <2 x i8> , %A ; thwart complexity-based canonicalization diff --git a/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll b/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll index af580ba57513c2e..3bd341ffafb58b1 100644 --- a/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll +++ b/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll @@ -41,7 +41,7 @@ define i8 @p0_scalar_not_truly_negatable(i8 %x, i8 %y) { define <4 x i32> @p1_vector_splat(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p1_vector_splat( -; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[T0_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; @@ -52,7 +52,7 @@ define <4 x i32> @p1_vector_splat(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @p2_vector_poison(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p2_vector_poison( -; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[T0_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; @@ -109,7 +109,7 @@ define i32 @n5_is_not_not(i32 %x, i32 %y) { define <2 x i32> @n5_is_not_not_vec_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @n5_is_not_not_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[T1:%.*]] = sub <2 x i32> [[Y:%.*]], [[T0]] ; CHECK-NEXT: ret <2 x i32> [[T1]] ; diff --git a/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll b/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll index 4034759df060489..66970a9d48ddf1d 100644 --- a/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll +++ b/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll @@ -253,7 +253,7 @@ define <2 x i1> @f32_fcnan_fcinf_vec(<2 x float> %a) { ; CHECK-LABEL: define <2 x i1> @f32_fcnan_fcinf_vec( ; CHECK-SAME: <2 x float> [[A:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[A]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq <2 x float> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %i32 = bitcast <2 x float> %a to <2 x i32> @@ -278,7 +278,7 @@ define <2 x i1> @f32_fcinf_vec(<2 x float> %a) { ; CHECK-LABEL: define <2 x i1> @f32_fcinf_vec( ; CHECK-SAME: <2 x float> [[A:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[A]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %i32 = bitcast <2 x float> %a to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index f1fba6cb272f9f3..79d3360377e5065 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -62,7 +62,7 @@ define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) { define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) { ; CHECK-LABEL: @fshl_mask_simplify2( -; CHECK-NEXT: [[MASKEDSH:%.*]] = and <2 x i31> [[SH:%.*]], +; CHECK-NEXT: [[MASKEDSH:%.*]] = and <2 x i31> [[SH:%.*]], splat (i31 32) ; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[MASKEDSH]]) ; CHECK-NEXT: ret <2 x i31> [[R]] ; @@ -138,7 +138,7 @@ define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) { ; CHECK-LABEL: @fshl_set_but_not_demanded_vec( -; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], +; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], splat (i31 32) ; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[BOGUSBITS]]) ; CHECK-NEXT: ret <2 x i31> [[R]] ; @@ -223,7 +223,7 @@ define i33 @fshr_op1_zero(i33 %x) { define <2 x i31> @fshl_op0_zero_splat_vec(<2 x i31> %x) { ; CHECK-LABEL: @fshl_op0_zero_splat_vec( -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i31> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i31> [[X:%.*]], splat (i31 24) ; CHECK-NEXT: ret <2 x i31> [[R]] ; %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> ) @@ -232,7 +232,7 @@ define <2 x i31> @fshl_op0_zero_splat_vec(<2 x i31> %x) { define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) { ; CHECK-LABEL: @fshl_op1_undef_splat_vec( -; CHECK-NEXT: [[R:%.*]] = shl <2 x i31> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl <2 x i31> [[X:%.*]], splat (i31 7) ; CHECK-NEXT: ret <2 x i31> [[R]] ; %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> ) @@ -241,7 +241,7 @@ define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) { define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @fshr_op0_undef_splat_vec( -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> ) @@ -250,7 +250,7 @@ define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) { define <2 x i32> @fshr_op1_zero_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @fshr_op1_zero_splat_vec( -; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 25) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> ) @@ -341,7 +341,7 @@ define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) { define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) { ; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat( -; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], +; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], splat (i31 24) ; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], ; CHECK-NEXT: ret <2 x i31> [[R]] ; @@ -466,7 +466,7 @@ define i33 @rotr_common_demanded(i33 %a0) { define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) { ; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat( -; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], +; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], splat (i31 24) ; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], ; CHECK-NEXT: ret <2 x i31> [[R]] ; @@ -486,7 +486,7 @@ define i32 @rotl_constant_shift_amount(i32 %x) { define <2 x i31> @rotl_constant_shift_amount_vec(<2 x i31> %x) { ; CHECK-LABEL: @rotl_constant_shift_amount_vec( -; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[X]], <2 x i31> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[X]], <2 x i31> splat (i31 1)) ; CHECK-NEXT: ret <2 x i31> [[R]] ; %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %x, <2 x i31> ) @@ -852,7 +852,7 @@ entry: define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) { ; CHECK-LABEL: @fshr_mask_args_same_vector( -; CHECK-NEXT: [[T3:%.*]] = shl <2 x i31> [[A:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shl <2 x i31> [[A:%.*]], splat (i31 10) ; CHECK-NEXT: ret <2 x i31> [[T3]] ; %t1 = and <2 x i31> %a, @@ -864,7 +864,7 @@ define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) { define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @fshr_mask_args_same_vector2( ; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[T3:%.*]] = lshr exact <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = lshr exact <2 x i32> [[T1]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %a, @@ -875,7 +875,7 @@ define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) { define <2 x i31> @fshr_mask_args_same_vector3_different_but_still_prunable(<2 x i31> %a) { ; CHECK-LABEL: @fshr_mask_args_same_vector3_different_but_still_prunable( -; CHECK-NEXT: [[T1:%.*]] = and <2 x i31> [[A:%.*]], +; CHECK-NEXT: [[T1:%.*]] = and <2 x i31> [[A:%.*]], splat (i31 1000) ; CHECK-NEXT: [[T3:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[A]], <2 x i31> [[T1]], <2 x i31> ) ; CHECK-NEXT: ret <2 x i31> [[T3]] ; diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll index fa0d59b2269983b..0e5f0469264c7ce 100644 --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -34,7 +34,7 @@ define i42 @fshr_i42_constant(i42 %x, i42 %y) { define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -45,7 +45,7 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { define <2 x i16> @fshl_v2i16_constant_splat_poison0(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat_poison0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -56,7 +56,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_poison0(<2 x i16> %x, <2 x i16> %y) define <2 x i16> @fshl_v2i16_constant_splat_poison1(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -69,7 +69,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_poison1(<2 x i16> %x, <2 x i16> %y) define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> splat (i17 5)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -80,7 +80,7 @@ define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { define <2 x i17> @fshr_v2i17_constant_splat_poison0(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_poison0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> splat (i17 5)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -91,7 +91,7 @@ define <2 x i17> @fshr_v2i17_constant_splat_poison0(<2 x i17> %x, <2 x i17> %y) define <2 x i17> @fshr_v2i17_constant_splat_poison1(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> splat (i17 5)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -481,11 +481,11 @@ define i32 @fshl_concat_unknown_source(i32 %zext.x, i32 %zext.y, ptr %addr) { define <2 x i32> @fshl_concat_vector(<2 x i8> %x, <2 x i24> %y, ptr %addr) { ; CHECK-LABEL: @fshl_concat_vector( ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], +; CHECK-NEXT: [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], splat (i32 24) ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext <2 x i24> [[Y:%.*]] to <2 x i32> ; CHECK-NEXT: [[XY:%.*]] = or disjoint <2 x i32> [[SLX]], [[ZEXT_Y]] ; CHECK-NEXT: store <2 x i32> [[XY]], ptr [[ADDR:%.*]], align 4 -; CHECK-NEXT: [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> ) +; CHECK-NEXT: [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> splat (i32 8)) ; CHECK-NEXT: ret <2 x i32> [[YX]] ; %zext.x = zext <2 x i8> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll index 1cb7cf99bea325d..5f22a354570157b 100644 --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -97,8 +97,8 @@ define void @PR37005(ptr %base, ptr %in) { ; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48 ; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds ptr, ptr [[E2]], <2 x i64> ; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x ptr> [[E4]] to <2 x i64> -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], -; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], splat (i64 14) +; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], splat (i64 1125899906842496) ; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, ptr [[BASE:%.*]], <2 x i64> [[SL1]] ; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80 ; CHECK-NEXT: call void @blackhole(<2 x ptr> [[E6]]) @@ -159,8 +159,8 @@ define void @PR37005_3(<2 x ptr> %base, ptr %in) { ; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48 ; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds ptr, ptr [[E2]], <2 x i64> ; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x ptr> [[E4]] to <2 x i64> -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], -; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], splat (i64 14) +; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], splat (i64 1125899906842496) ; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, <2 x ptr> [[BASE:%.*]], <2 x i64> [[SL1]] ; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80 ; CHECK-NEXT: call void @blackhole(<2 x ptr> [[E6]]) @@ -189,7 +189,7 @@ define void @PR51485(<2 x i64> %v) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[V:%.*]], splat (i64 7) ; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, ptr @PR51485, <2 x i64> [[SL1]] ; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80 ; CHECK-NEXT: call void @blackhole(<2 x ptr> [[E6]]) diff --git a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll index e8eaf4e24f7e422..c1c11c1acc7bbc8 100644 --- a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll +++ b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll @@ -99,7 +99,7 @@ define i1 @test5(ptr %x, ptr %y) { define <2 x i1> @test6(<2 x i32> %X, <2 x ptr> %P) nounwind { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = getelementptr inbounds %S, <2 x ptr> %P, <2 x i32> zeroinitializer, <2 x i32> , <2 x i32> %X @@ -111,7 +111,7 @@ define <2 x i1> @test6(<2 x i32> %X, <2 x ptr> %P) nounwind { ; Same as above, but indices scalarized. define <2 x i1> @test6b(<2 x i32> %X, <2 x ptr> %P) nounwind { ; CHECK-LABEL: @test6b( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = getelementptr inbounds %S, <2 x ptr> %P, i32 0, i32 1, <2 x i32> %X diff --git a/llvm/test/Transforms/InstCombine/gep-vector.ll b/llvm/test/Transforms/InstCombine/gep-vector.ll index 4d20323b7896745..d8a65b69aceff74 100644 --- a/llvm/test/Transforms/InstCombine/gep-vector.ll +++ b/llvm/test/Transforms/InstCombine/gep-vector.ll @@ -5,7 +5,7 @@ define <2 x ptr> @vectorindex1() { ; CHECK-LABEL: @vectorindex1( -; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> , <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> , <2 x i64> splat (i64 8192)) ; %1 = getelementptr inbounds [64 x [8192 x i8]], ptr @block, i64 0, <2 x i64> , i64 8192 ret <2 x ptr> %1 @@ -13,7 +13,7 @@ define <2 x ptr> @vectorindex1() { define <2 x ptr> @vectorindex2() { ; CHECK-LABEL: @vectorindex2( -; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> , <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> splat (i64 1), <2 x i64> ) ; %1 = getelementptr inbounds [64 x [8192 x i8]], ptr @block, i64 0, i64 1, <2 x i64> ret <2 x ptr> %1 diff --git a/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll b/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll index 40caa57891369ff..81a148b745906ef 100644 --- a/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll +++ b/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll @@ -32,8 +32,8 @@ define i16 @t1(i16 %x) { ; Vectors define <2 x i8> @t2_vec(<2 x i8> %x) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = lshr <2 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> splat (i8 7), [[X:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = lshr <2 x i8> splat (i8 -1), [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[MASK]] ; %bitmask = shl <2 x i8> , %x @@ -43,8 +43,8 @@ define <2 x i8> @t2_vec(<2 x i8> %x) { } define <3 x i8> @t3_vec_poison0(<3 x i8> %x) { ; CHECK-LABEL: @t3_vec_poison0( -; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; %bitmask = shl <3 x i8> , %x @@ -54,8 +54,8 @@ define <3 x i8> @t3_vec_poison0(<3 x i8> %x) { } define <3 x i8> @t4_vec_poison1(<3 x i8> %x) { ; CHECK-LABEL: @t4_vec_poison1( -; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; %bitmask = shl <3 x i8> , %x @@ -65,8 +65,8 @@ define <3 x i8> @t4_vec_poison1(<3 x i8> %x) { } define <3 x i8> @t5_vec_poison2(<3 x i8> %x) { ; CHECK-LABEL: @t5_vec_poison2( -; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; %bitmask = shl <3 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index 6a5f77e8cf6e08a..8f28049cf5f5815 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -222,7 +222,7 @@ define i1 @test13(i64 %X, ptr %P) { define <2 x i1> @test13_vector(<2 x i64> %X, <2 x ptr> %P) nounwind { ; CHECK-LABEL: @test13_vector( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = getelementptr inbounds %S, <2 x ptr> %P, <2 x i64> zeroinitializer, <2 x i32> , <2 x i64> %X @@ -250,7 +250,7 @@ define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], ; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], splat (i64 4) ; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -325,7 +325,7 @@ define i1 @test13_as1(i16 %X, ptr addrspace(1) %P) { define <2 x i1> @test13_vector_as1(<2 x i16> %X, <2 x ptr addrspace(1)> %P) { ; CHECK-LABEL: @test13_vector_as1( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i16> [[X:%.*]], splat (i16 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = getelementptr inbounds %S, <2 x ptr addrspace(1)> %P, <2 x i16> , <2 x i32> , <2 x i16> %X diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll index 3ebab115f654391..5786c30fc7423a6 100644 --- a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll +++ b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll @@ -48,7 +48,7 @@ define i32 @t3_exact(i64 %x) { define <2 x i32> @t4(<2 x i64> %x) { ; CHECK-LABEL: @t4( -; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 63) ; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nsw <2 x i64> [[T0_NEG]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T1_NEG]] ; diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask.ll index 53a7fb4461c57e3..b4bad9e6bd8e1f9 100644 --- a/llvm/test/Transforms/InstCombine/high-bit-signmask.ll +++ b/llvm/test/Transforms/InstCombine/high-bit-signmask.ll @@ -40,7 +40,7 @@ define i64 @t3_exact(i64 %x) { define <2 x i64> @t4(<2 x i64> %x) { ; CHECK-LABEL: @t4( -; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[T0_NEG]] ; %t0 = lshr <2 x i64> %x, diff --git a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll index ab37becf0e31179..ff03b3d5b678c6e 100644 --- a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll +++ b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll @@ -26,7 +26,7 @@ define i8 @t0(i8 %x) { define <2 x i8> @t1_vec(<2 x i8> %x) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -43) ; CHECK-NEXT: [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[NEGBIAS]] ; diff --git a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll index 6049997db4d1ae3..d4aa8b5dbf50556 100644 --- a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll +++ b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll @@ -44,7 +44,7 @@ define i8 @t1_commutative(i8 %y) { define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[NEGBIAS]] @@ -57,7 +57,7 @@ define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @t3_vec_poison(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t3_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[NEGBIAS]] diff --git a/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll b/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll index 2217666f0f49a29..f4e72c073e8a5c4 100644 --- a/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll +++ b/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll @@ -34,7 +34,7 @@ define i8 @t1(i8 %x, i8 %y) { define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t2_vec( ; CHECK-NEXT: [[NOT_X_NOT:%.*]] = ashr <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], +; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i8> [[ASHR]] ; %not_x = xor <2 x i8> %x, @@ -45,7 +45,7 @@ define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @t3_vec_poison(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t3_vec_poison( ; CHECK-NEXT: [[NOT_X_NOT:%.*]] = ashr <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], +; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i8> [[ASHR]] ; %not_x = xor <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll b/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll index f92b10b0ccb3714..39a76ae11ceef3a 100644 --- a/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll +++ b/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll @@ -20,7 +20,7 @@ define i8 @t0_scalar(i8 %x, i8 %y) { define <2 x i8> @t1_splatvec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_splatvec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 42) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %i0 = xor <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index 0c141d4b8e73aaf..dd8e9c1a45ea108 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -514,7 +514,7 @@ bb: define <2 x i1> @cvt_icmp_2_sext_plus_zext_ne_vec(<2 x i1> %arg, <2 x i1> %arg1) { ; CHECK-LABEL: @cvt_icmp_2_sext_plus_zext_ne_vec( ; CHECK-NEXT: bb: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; bb: %i = sext <2 x i1> %arg to <2 x i32> @@ -1740,7 +1740,7 @@ define i1 @test1(i32 %a) { define <2 x i1> @test1vec(<2 x i32> %a) { ; CHECK-LABEL: @test1vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -5) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add <2 x i32> %a, @@ -1760,7 +1760,7 @@ define i1 @test2(i32 %a) { define <2 x i1> @test2vec(<2 x i32> %a) { ; CHECK-LABEL: @test2vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = sub <2 x i32> %a, @@ -1780,7 +1780,7 @@ define i1 @test3(i32 %a) { define <2 x i1> @test3vec(<2 x i32> %a) { ; CHECK-LABEL: @test3vec( -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 2147483643) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add <2 x i32> %a, @@ -1818,7 +1818,7 @@ define { i32, i1 } @test4multiuse(i32 %a) { define <2 x i1> @test4vec(<2 x i32> %a) { ; CHECK-LABEL: @test4vec( -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], splat (i32 -4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add <2 x i32> %a, @@ -1841,7 +1841,7 @@ define i1 @nsw_slt1(i8 %a) { define <2 x i1> @nsw_slt1_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @nsw_slt1_splat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add nsw <2 x i8> %a, @@ -1864,7 +1864,7 @@ define i1 @nsw_slt2(i8 %a) { define <2 x i1> @nsw_slt2_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @nsw_slt2_splat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add nsw <2 x i8> %a, @@ -1913,7 +1913,7 @@ define i1 @nsw_sgt1(i8 %a) { define <2 x i1> @nsw_sgt1_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @nsw_sgt1_splat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add nsw <2 x i8> %a, @@ -1933,7 +1933,7 @@ define i1 @nsw_sgt2(i8 %a) { define <2 x i1> @nsw_sgt2_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @nsw_sgt2_splat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -126) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add nsw <2 x i8> %a, @@ -1958,7 +1958,7 @@ define i1 @slt_zero_add_nsw(i32 %a) { define <2 x i1> @slt_zero_add_nsw_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @slt_zero_add_nsw_splat_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = add nsw <2 x i8> %a, @@ -2117,7 +2117,7 @@ define <2 x i1> @op_ugt_sum_vec_commute2(<2 x i8> %p1, <2 x i8> %p2) { ; CHECK-LABEL: @op_ugt_sum_vec_commute2( ; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[P1:%.*]] ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i8> , [[P2:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X]], splat (i8 -1) ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> [[Y]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -2149,7 +2149,7 @@ define <2 x i1> @sum_ult_op_vec_commute1(<2 x i8> %p1, <2 x i8> %p2) { ; CHECK-LABEL: @sum_ult_op_vec_commute1( ; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[P1:%.*]] ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i8> , [[P2:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X]], splat (i8 -1) ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> [[Y]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -2367,7 +2367,7 @@ define <2 x i1> @icmp_eq_add_non_splat(<2 x i32> %a) { define <2 x i1> @icmp_eq_add_undef2(<2 x i32> %a) { ; CHECK-LABEL: @icmp_eq_add_undef2( -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 5) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -2378,7 +2378,7 @@ define <2 x i1> @icmp_eq_add_undef2(<2 x i32> %a) { define <2 x i1> @icmp_eq_add_non_splat2(<2 x i32> %a) { ; CHECK-LABEL: @icmp_eq_add_non_splat2( -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 5) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -2496,7 +2496,7 @@ define i1 @ugt_offset_use(i32 %a) { define <2 x i1> @ugt_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @ugt_offset_splat( -; CHECK-NEXT: [[OV:%.*]] = icmp slt <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp slt <2 x i5> [[A:%.*]], splat (i5 -9) ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -2552,7 +2552,7 @@ define i1 @ult_offset_use(i32 %a) { define <2 x i1> @ult_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @ult_offset_splat( -; CHECK-NEXT: [[OV:%.*]] = icmp sgt <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp sgt <2 x i5> [[A:%.*]], splat (i5 -10) ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -2608,7 +2608,7 @@ define i1 @sgt_offset_use(i32 %a) { define <2 x i1> @sgt_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @sgt_offset_splat( -; CHECK-NEXT: [[OV:%.*]] = icmp ult <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp ult <2 x i5> [[A:%.*]], splat (i5 7) ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -2664,7 +2664,7 @@ define i1 @slt_offset_use(i32 %a) { define <2 x i1> @slt_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @slt_offset_splat( -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <2 x i5> [[A:%.*]], splat (i5 6) ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -2976,8 +2976,8 @@ define i1 @icmp_dec_nonzero(i16 %x) { define <2 x i1> @icmp_dec_nonzero_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_dec_nonzero_vec( -; CHECK-NEXT: [[O:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[I:%.*]] = add nsw <2 x i32> [[O]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 8) +; CHECK-NEXT: [[I:%.*]] = add nsw <2 x i32> [[O]], splat (i32 -1) ; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[I]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -3068,8 +3068,8 @@ define i1 @ult_add_C2_neg_C_pow2(i8 %x) { define <2 x i1> @ult_add_C2_pow2_C_neg_vec(<2 x i8> %x) { ; CHECK-LABEL: @ult_add_C2_pow2_C_neg_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -32) +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 -64) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = add <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll index d092363309fec02..78f1bc7d7379d47 100644 --- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll +++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll @@ -18,7 +18,7 @@ define i32 @icmp_eq_and_pow2_shl1(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_shl1_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_shl1_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 4) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -44,7 +44,7 @@ define i32 @icmp_ne_and_pow2_shl1(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_shl1_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_shl1_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 4) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -70,7 +70,7 @@ define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 2) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -96,7 +96,7 @@ define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 2) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -162,7 +162,7 @@ define i32 @icmp_eq_and_pow2_minus1_shl1(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_minus1_shl1_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl1_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 3) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -188,7 +188,7 @@ define i32 @icmp_ne_and_pow2_minus1_shl1(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_minus1_shl1_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl1_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], splat (i32 4) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -214,7 +214,7 @@ define i32 @icmp_eq_and_pow2_minus1_shl_pow2(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_minus1_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl_pow2_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 1) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -240,7 +240,7 @@ define i32 @icmp_ne_and_pow2_minus1_shl_pow2(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_minus1_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl_pow2_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], splat (i32 2) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -293,7 +293,7 @@ define i32 @icmp_eq_and1_lshr_pow2(i32 %0) { define <2 x i32> @icmp_eq_and1_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and1_lshr_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 3) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -319,7 +319,7 @@ define i32 @icmp_ne_and1_lshr_pow2(i32 %0) { define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and1_lshr_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 1) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -356,7 +356,7 @@ define i32 @icmp_eq_and_pow2_lshr_pow2_case2(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 1) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -393,7 +393,7 @@ define i32 @icmp_ne_and_pow2_lshr_pow2_case2(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 1) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -686,7 +686,7 @@ define i1 @test_const_shr_and_2_ne_0_negative(i32 %b) { define <8 x i1> @test_const_shr_and_1_ne_0_v8i8_splat_negative(<8 x i8> %b) { ; CHECK-LABEL: @test_const_shr_and_1_ne_0_v8i8_splat_negative( -; CHECK-NEXT: [[SHR:%.*]] = lshr <8 x i8> , [[B:%.*]] +; CHECK-NEXT: [[SHR:%.*]] = lshr <8 x i8> splat (i8 42), [[B:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = trunc <8 x i8> [[SHR]] to <8 x i1> ; CHECK-NEXT: ret <8 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll index f667e1aa105d04a..264e951b32e49b9 100644 --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -16,8 +16,8 @@ define i1 @is_rem2_neg_i8(i8 %x) { define <2 x i1> @is_rem2_pos_v2i8(<2 x i8> %x) { ; CHECK-LABEL: @is_rem2_pos_v2i8( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -127) +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %s = srem <2 x i8> %x, @@ -210,8 +210,8 @@ define i1 @udiv_eq_umax(i8 %x, i8 %y) { define <2 x i1> @udiv_ne_umax(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @udiv_ne_umax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], splat (i5 -1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], splat (i5 1) ; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -299,8 +299,8 @@ define i1 @sdiv_eq_smin(i8 %x, i8 %y) { define <2 x i1> @sdiv_ne_smin(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @sdiv_ne_smin( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], splat (i5 -16) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], splat (i5 1) ; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -434,7 +434,7 @@ define i1 @lshr_x_by_const_cmp_x(i32 %x) { define <4 x i1> @lshr_by_const_cmp_sle_value(<4 x i32> %x) { ; CHECK-LABEL: @lshr_by_const_cmp_sle_value( -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %v = lshr <4 x i32> %x, @@ -444,7 +444,7 @@ define <4 x i1> @lshr_by_const_cmp_sle_value(<4 x i32> %x) { define <4 x i1> @lshr_by_const_cmp_sle_value_non_splat(<4 x i32> %x) { ; CHECK-LABEL: @lshr_by_const_cmp_sle_value_non_splat( -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %v = lshr <4 x i32> %x, @@ -455,7 +455,7 @@ define <4 x i1> @lshr_by_const_cmp_sle_value_non_splat(<4 x i32> %x) { define <4 x i1> @ashr_by_const_cmp_sge_value_non_splat(<4 x i32> %x) { ; CHECK-LABEL: @ashr_by_const_cmp_sge_value_non_splat( -; CHECK-NEXT: [[R:%.*]] = icmp slt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %v = ashr <4 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-fsh.ll b/llvm/test/Transforms/InstCombine/icmp-fsh.ll index 8154f312dc5d401..cafc171301f7d35 100644 --- a/llvm/test/Transforms/InstCombine/icmp-fsh.ll +++ b/llvm/test/Transforms/InstCombine/icmp-fsh.ll @@ -45,7 +45,7 @@ define i1 @rotl_eq_n1(i8 %x, i8 %y) { define <2 x i1> @rotl_ne_n1(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @rotl_ne_n1( -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[X:%.*]], splat (i5 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rot = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5>%x, <2 x i5> %x, <2 x i5> %y) diff --git a/llvm/test/Transforms/InstCombine/icmp-logical.ll b/llvm/test/Transforms/InstCombine/icmp-logical.ll index 4690ead483a5bae..50feb51092fd9ea 100644 --- a/llvm/test/Transforms/InstCombine/icmp-logical.ll +++ b/llvm/test/Transforms/InstCombine/icmp-logical.ll @@ -17,7 +17,7 @@ define i1 @masked_and_notallzeroes(i32 %A) { define <2 x i1> @masked_and_notallzeroes_splat(<2 x i32> %A) { ; CHECK-LABEL: @masked_and_notallzeroes_splat( -; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7) ; CHECK-NEXT: [[TST1:%.*]] = icmp ne <2 x i32> [[MASK1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TST1]] ; @@ -371,7 +371,7 @@ define i1 @fold_mask_cmps_to_true_logical(i32 %x) { define <2 x i1> @nomask_splat_and_B_allones(<2 x i32> %A) { ; CHECK-LABEL: @nomask_splat_and_B_allones( -; CHECK-NEXT: [[RES:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[RES:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -268435457) ; CHECK-NEXT: ret <2 x i1> [[RES]] ; %tst1 = icmp slt <2 x i32> %A, @@ -383,8 +383,8 @@ define <2 x i1> @nomask_splat_and_B_allones(<2 x i32> %A) { define <2 x i1> @nomask_splat_and_B_mixed(<2 x i32> %A) { ; CHECK-LABEL: @nomask_splat_and_B_mixed( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -268435456) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 1879048192) ; CHECK-NEXT: ret <2 x i1> [[RES]] ; %tst1 = icmp sgt <2 x i32> %A, @@ -476,8 +476,8 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) { define <2 x i1> @masked_icmps_mask_notallzeros_bmask_mixed_1_vector(<2 x i32> %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1_vector( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 15) +; CHECK-NEXT: [[T5:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 9) ; CHECK-NEXT: ret <2 x i1> [[T5]] ; %t1 = and <2 x i32> %x, @@ -1778,8 +1778,8 @@ define i1 @masked_icmps_bmask_notmixed_or(i32 %A) { define <2 x i1> @masked_icmps_bmask_notmixed_or_vec(<2 x i8> %A) { ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[MASK1]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RES]] ; %mask1 = and <2 x i8> %A, ; 0x0f @@ -1792,9 +1792,9 @@ define <2 x i1> @masked_icmps_bmask_notmixed_or_vec(<2 x i8> %A) { define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison1(<2 x i8> %A) { ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec_poison1( -; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15) ; CHECK-NEXT: [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], -; CHECK-NEXT: [[TST2:%.*]] = icmp eq <2 x i8> [[A]], +; CHECK-NEXT: [[TST2:%.*]] = icmp eq <2 x i8> [[A]], splat (i8 -13) ; CHECK-NEXT: [[RES:%.*]] = or <2 x i1> [[TST1]], [[TST2]] ; CHECK-NEXT: ret <2 x i1> [[RES]] ; @@ -1808,8 +1808,8 @@ define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison1(<2 x i8> %A) { define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison2(<2 x i8> %A) { ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec_poison2( -; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], +; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15) +; CHECK-NEXT: [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], splat (i8 3) ; CHECK-NEXT: [[TST2:%.*]] = icmp eq <2 x i8> [[A]], ; CHECK-NEXT: [[RES:%.*]] = or <2 x i1> [[TST1]], [[TST2]] ; CHECK-NEXT: ret <2 x i1> [[RES]] diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll index 7e7f087ca7112c1..7fa75184c1cf3bc 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll @@ -158,7 +158,7 @@ define i32 @pr40493_neg3(i32 %area) { define <4 x i1> @pr40493_vec1(<4 x i32> %area) { ; CHECK-LABEL: @pr40493_vec1( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[AREA:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[AREA:%.*]], splat (i32 1) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -171,7 +171,7 @@ define <4 x i1> @pr40493_vec1(<4 x i32> %area) { define <4 x i1> @pr40493_vec2(<4 x i32> %area) { ; CHECK-LABEL: @pr40493_vec2( ; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], -; CHECK-NEXT: [[REM:%.*]] = and <4 x i32> [[MUL]], +; CHECK-NEXT: [[REM:%.*]] = and <4 x i32> [[MUL]], splat (i32 4) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[REM]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -183,7 +183,7 @@ define <4 x i1> @pr40493_vec2(<4 x i32> %area) { define <4 x i1> @pr40493_vec3(<4 x i32> %area) { ; CHECK-LABEL: @pr40493_vec3( -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], splat (i32 12) ; CHECK-NEXT: [[REM:%.*]] = and <4 x i32> [[MUL]], ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[REM]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll index c4543c9deef3883..c9f9b6d809e8a5f 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll @@ -257,7 +257,7 @@ define i1 @eq_nsw_rem_zero(i8 %x) { define <2 x i1> @ne_nsw_rem_zero(<2 x i8> %x) { ; CHECK-LABEL: @ne_nsw_rem_zero( -; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[X:%.*]], splat (i8 -6) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = mul nsw <2 x i8> %x, @@ -270,7 +270,7 @@ define <2 x i1> @ne_nsw_rem_zero(<2 x i8> %x) { define <2 x i1> @ne_nsw_rem_zero_undef1(<2 x i8> %x) { ; CHECK-LABEL: @ne_nsw_rem_zero_undef1( ; CHECK-NEXT: [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[A]], +; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[A]], splat (i8 -30) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = mul nsw <2 x i8> %x, @@ -282,7 +282,7 @@ define <2 x i1> @ne_nsw_rem_zero_undef1(<2 x i8> %x) { define <2 x i1> @ne_nsw_rem_zero_undef2(<2 x i8> %x) { ; CHECK-LABEL: @ne_nsw_rem_zero_undef2( -; CHECK-NEXT: [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[A]], ; CHECK-NEXT: ret <2 x i1> [[B]] ; @@ -328,7 +328,7 @@ define i1 @ne_nsw_rem_nz(i8 %x) { define <2 x i1> @eq_nuw_rem_zero(<2 x i8> %x) { ; CHECK-LABEL: @eq_nuw_rem_zero( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = mul nuw <2 x i8> %x, @@ -341,7 +341,7 @@ define <2 x i1> @eq_nuw_rem_zero(<2 x i8> %x) { define <2 x i1> @eq_nuw_rem_zero_undef1(<2 x i8> %x) { ; CHECK-LABEL: @eq_nuw_rem_zero_undef1( ; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[A]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[A]], splat (i8 20) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = mul nuw <2 x i8> %x, @@ -353,7 +353,7 @@ define <2 x i1> @eq_nuw_rem_zero_undef1(<2 x i8> %x) { define <2 x i1> @eq_nuw_rem_zero_undef2(<2 x i8> %x) { ; CHECK-LABEL: @eq_nuw_rem_zero_undef2( -; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[A]], ; CHECK-NEXT: ret <2 x i1> [[B]] ; @@ -783,7 +783,7 @@ define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @eq_mul_constants_with_tz_splat( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1073741823) ; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -1042,7 +1042,7 @@ define i1 @mul_evenC_ne(i8 %v) { define <2 x i1> @mul_oddC_ne_vec(<2 x i8> %v) { ; CHECK-LABEL: @mul_oddC_ne_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[V:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %mul = mul <2 x i8> %v, @@ -1110,7 +1110,7 @@ define i1 @mul_xy_z_assumeodd_eq(i8 %x, i8 %y, i8 %z) { define <2 x i1> @reused_mul_nsw_xy_z_setnonzero_vec_ne(<2 x i8> %x, <2 x i8> %y, <2 x i8> %zi) { ; CHECK-LABEL: @reused_mul_nsw_xy_z_setnonzero_vec_ne( -; CHECK-NEXT: [[Z:%.*]] = or <2 x i8> [[ZI:%.*]], +; CHECK-NEXT: [[Z:%.*]] = or <2 x i8> [[ZI:%.*]], splat (i8 4) ; CHECK-NEXT: [[MULY:%.*]] = mul nsw <2 x i8> [[Y:%.*]], [[Z]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[Y]], [[X:%.*]] ; CHECK-NEXT: call void @usev2xi8(<2 x i8> [[MULY]]) diff --git a/llvm/test/Transforms/InstCombine/icmp-not-bool-constant.ll b/llvm/test/Transforms/InstCombine/icmp-not-bool-constant.ll index 439c1d6af6654cd..9afe2a93c56f001 100644 --- a/llvm/test/Transforms/InstCombine/icmp-not-bool-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-not-bool-constant.ll @@ -7,7 +7,7 @@ define <2 x i1> @eq_t_not(<2 x i1> %a) { ; CHECK-LABEL: @eq_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -35,7 +35,7 @@ define <2 x i1> @ne_t_not(<2 x i1> %a) { define <2 x i1> @ne_f_not(<2 x i1> %a) { ; CHECK-LABEL: @ne_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -54,7 +54,7 @@ define <2 x i1> @ugt_t_not(<2 x i1> %a) { define <2 x i1> @ugt_f_not(<2 x i1> %a) { ; CHECK-LABEL: @ugt_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -109,7 +109,7 @@ define <2 x i1> @slt_t_not(<2 x i1> %a) { define <2 x i1> @slt_f_not(<2 x i1> %a) { ; CHECK-LABEL: @slt_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -119,7 +119,7 @@ define <2 x i1> @slt_f_not(<2 x i1> %a) { define <2 x i1> @uge_t_not(<2 x i1> %a) { ; CHECK-LABEL: @uge_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -129,7 +129,7 @@ define <2 x i1> @uge_t_not(<2 x i1> %a) { define <2 x i1> @uge_f_not(<2 x i1> %a) { ; CHECK-LABEL: @uge_f_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp uge <2 x i1> %not, @@ -138,7 +138,7 @@ define <2 x i1> @uge_f_not(<2 x i1> %a) { define <2 x i1> @ule_t_not(<2 x i1> %a) { ; CHECK-LABEL: @ule_t_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp ule <2 x i1> %not, @@ -156,7 +156,7 @@ define <2 x i1> @ule_f_not(<2 x i1> %a) { define <2 x i1> @sge_t_not(<2 x i1> %a) { ; CHECK-LABEL: @sge_t_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp sge <2 x i1> %not, @@ -174,7 +174,7 @@ define <2 x i1> @sge_f_not(<2 x i1> %a) { define <2 x i1> @sle_t_not(<2 x i1> %a) { ; CHECK-LABEL: @sle_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -184,7 +184,7 @@ define <2 x i1> @sle_t_not(<2 x i1> %a) { define <2 x i1> @sle_f_not(<2 x i1> %a) { ; CHECK-LABEL: @sle_f_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp sle <2 x i1> %not, diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll index 09c9c1ebc831591..0bcbc3bcb050b00 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll @@ -149,7 +149,7 @@ define <2 x i1> @icmp_sgt_x_negy_fail_partial(<2 x i8> %x, <2 x i8> %yy) { define <2 x i1> @icmp_sle_x_posy(<2 x i8> %x, <2 x i8> %yy) { ; CHECK-LABEL: @icmp_sle_x_posy( -; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = and <2 x i8> %yy, @@ -187,7 +187,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) { define <2 x i1> @icmp_sgt_negx_y(<2 x i8> %xx, <2 x i8> %y) { ; CHECK-LABEL: @icmp_sgt_negx_y( -; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x = or <2 x i8> %xx, diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll index 93eeab4732185b5..993325f6ff0b097 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll @@ -31,7 +31,7 @@ define <2 x i1> @or_ule(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @or_slt_pos(<2 x i8> %xx, <2 x i8> %yy, <2 x i8> %z) { ; CHECK-LABEL: @or_slt_pos( ; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[Y:%.*]] = and <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = and <2 x i8> [[YY:%.*]], splat (i8 127) ; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i8> [[X]], [[XN1]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -192,7 +192,7 @@ define i1 @or_slt_intmin(i8 %x) { define <2 x i1> @or_slt_intmin_2(<2 x i8> %xx, <2 x i8> %z) { ; CHECK-LABEL: @or_slt_intmin_2( ; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i8> [[X]], [[XN1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -268,7 +268,7 @@ pos: define <2 x i1> @or_sgt_intmin_2(<2 x i8> %xx, <2 x i8> %z) { ; CHECK-LABEL: @or_sgt_intmin_2( ; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[X]], [[XN1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll index a4e7acbca930dc4..0635bb31c44fcfb 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll @@ -384,8 +384,8 @@ define i1 @xor_ult(i8 %x) { define <2 x i1> @xor_sgt(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @xor_sgt( -; CHECK-NEXT: [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], +; CHECK-NEXT: [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 31) +; CHECK-NEXT: [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], splat (i8 64) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[X:%.*]], [[Y1]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR]], [[X]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -399,8 +399,8 @@ define <2 x i1> @xor_sgt(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @xor_sgt_fail_no_known_msb(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @xor_sgt_fail_no_known_msb( -; CHECK-NEXT: [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], +; CHECK-NEXT: [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 55) +; CHECK-NEXT: [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], splat (i8 8) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[X:%.*]], [[Y1]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR]], [[X]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -426,7 +426,7 @@ define i1 @xor_slt_2(i8 %x, i8 %y, i8 %z) { define <2 x i1> @xor_sgt_intmin_2(<2 x i8> %xx, <2 x i8> %yy, <2 x i8> %z) { ; CHECK-LABEL: @xor_sgt_intmin_2( ; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[Y:%.*]] = or <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = or <2 x i8> [[YY:%.*]], splat (i8 -128) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[X]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[X]], [[XOR]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll index 56115f6d7d34144..6e054bd8d8839e6 100644 --- a/llvm/test/Transforms/InstCombine/icmp-or.ll +++ b/llvm/test/Transforms/InstCombine/icmp-or.ll @@ -16,8 +16,8 @@ define i1 @set_low_bit_mask_eq(i8 %x) { define <2 x i1> @set_low_bit_mask_ne(<2 x i8> %x) { ; CHECK-LABEL: @set_low_bit_mask_ne( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -4) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 16) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = or <2 x i8> %x, @@ -220,7 +220,7 @@ define i1 @eq_const_mask_use2(i8 %x, i8 %y) { define <2 x i1> @decrement_slt_0(<2 x i8> %x) { ; CHECK-LABEL: @decrement_slt_0( -; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %dec = add <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll b/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll index a151fd96ef49811..edbaffa6a12b9b1 100644 --- a/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll +++ b/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll @@ -373,7 +373,7 @@ define i1 @icmp_power2_and_icmp_shifted_mask_swapped_8_12_mask_overlap_fail(i32 ; Vector of 1 reduction define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647(<1 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647( -; CHECK-NEXT: [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], +; CHECK-NEXT: [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: ret <1 x i1> [[T4]] ; %t1 = icmp ult <1 x i32> %x, @@ -385,7 +385,7 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647( define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147483647(<1 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147483647( -; CHECK-NEXT: [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], +; CHECK-NEXT: [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: ret <1 x i1> [[T4]] ; %t1 = icmp ult <1 x i32> %x, @@ -529,9 +529,9 @@ define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_64_others(<6 x ; Vector of 0 of 1 compatible, no change define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_gap_in_mask_fail(<1 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_gap_in_mask_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], -; CHECK-NEXT: [[T2:%.*]] = and <1 x i32> [[X]], -; CHECK-NEXT: [[T3:%.*]] = icmp ne <1 x i32> [[T2]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[T2:%.*]] = and <1 x i32> [[X]], splat (i32 2147482647) +; CHECK-NEXT: [[T3:%.*]] = icmp ne <1 x i32> [[T2]], splat (i32 2147482647) ; CHECK-NEXT: [[T4:%.*]] = and <1 x i1> [[T1]], [[T3]] ; CHECK-NEXT: ret <1 x i1> [[T4]] ; @@ -544,9 +544,9 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147482647_gap_in_mask_fail(<1 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147482647_gap_in_mask_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], -; CHECK-NEXT: [[T2:%.*]] = and <1 x i32> [[X]], -; CHECK-NEXT: [[T3:%.*]] = icmp ne <1 x i32> [[T2]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[T2:%.*]] = and <1 x i32> [[X]], splat (i32 2147482647) +; CHECK-NEXT: [[T3:%.*]] = icmp ne <1 x i32> [[T2]], splat (i32 2147482647) ; CHECK-NEXT: [[T4:%.*]] = and <1 x i1> [[T3]], [[T1]] ; CHECK-NEXT: ret <1 x i1> [[T4]] ; @@ -560,7 +560,7 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_214 ; Vector 1 of 2 compatible, no change define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_gap_between_masks_fail(<2 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_gap_between_masks_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T2:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <2 x i1> [[T1]], [[T3]] @@ -575,7 +575,7 @@ define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_ define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_1073741823_gap_between_masks_fail(<2 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_1073741823_gap_between_masks_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T2:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <2 x i1> [[T3]], [[T1]] @@ -591,7 +591,7 @@ define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_107 ; Vector 1 of 7 compatible, no change define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail(<7 x i8> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[T2:%.*]] = and <7 x i8> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <7 x i8> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <7 x i1> [[T1]], [[T3]] @@ -606,7 +606,7 @@ define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail(<7 x i define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fail(<7 x i8> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[T2:%.*]] = and <7 x i8> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <7 x i8> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <7 x i1> [[T3]], [[T1]] @@ -622,7 +622,7 @@ define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fai ; Vector 0 of 6 compatible, no change define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail(<6 x i8> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[T2:%.*]] = and <6 x i8> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <6 x i8> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <6 x i1> [[T1]], [[T3]] @@ -637,7 +637,7 @@ define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail(<6 x i define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_0_of_6_fail(<6 x i8> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_0_of_6_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[T2:%.*]] = and <6 x i8> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <6 x i8> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <6 x i1> [[T3]], [[T1]] diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 2db5bad17b1977f..97ed552b9a6da25 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -238,7 +238,7 @@ define <2 x i1> @test_two_ranges_vec_false(ptr nocapture readonly %arg1, ptr noc ; Values' ranges do not overlap each other, so it can simplified to true. define <2 x i1> @test_two_ranges_vec_true(ptr nocapture readonly %arg1, ptr nocapture readonly %arg2) { ; CHECK-LABEL: @test_two_ranges_vec_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %val1 = load <2 x i32>, ptr %arg1, !range !0 %val2 = load <2 x i32>, ptr %arg2, !range !6 @@ -268,7 +268,7 @@ define <2 x i1> @test_two_argument_ranges_vec_false(<2 x i32> range(i32 1, 6) %a ; Values' ranges do not overlap each other, so it can simplified to true. define <2 x i1> @test_two_argument_ranges_vec_true(<2 x i32> range(i32 1, 6) %arg1, <2 x i32> range(i32 8, 16) %arg2) { ; CHECK-LABEL: @test_two_argument_ranges_vec_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %rval = icmp ugt <2 x i32> %arg2, %arg1 ret <2 x i1> %rval @@ -545,7 +545,7 @@ define i1 @sub_ule_zext(i1 %b, i8 %x, i8 %y) { define <2 x i1> @sub_ult_and(<2 x i8> %b, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_ult_and( -; CHECK-NEXT: [[A:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[A:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 1) ; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[S]], [[A]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -775,7 +775,7 @@ define i1 @sub_ult_sext(i1 %b, i8 %x, i8 %y) { define <2 x i1> @sub_ule_ashr(<2 x i8> %b, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_ule_ashr( -; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[B:%.*]], splat (i8 7) ; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[S]], [[A]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -1017,7 +1017,7 @@ define i1 @zext_sext_add_icmp_ugt_1(i1 %a, i1 %b) { define <2 x i1> @vector_zext_sext_add_icmp_slt_1(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @vector_zext_sext_add_icmp_slt_1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[B:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -1335,7 +1335,7 @@ define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp eq <2 x i32> %a, %conv = zext <2 x i1> %cmp to <2 x i32> @@ -1345,7 +1345,7 @@ define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_zext_ne_zero_vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp ne <2 x i32> %a, @@ -1356,7 +1356,7 @@ define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_zext_eq_one_vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, @@ -1367,7 +1367,7 @@ define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_zext_ne_one_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_zext_ne_one_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp ne <2 x i32> %a, %conv = zext <2 x i1> %cmp to <2 x i32> @@ -1520,7 +1520,7 @@ define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp eq <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1530,8 +1530,8 @@ define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -1) +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp ne <2 x i32> %a, @@ -1542,8 +1542,8 @@ define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -1) +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, @@ -1554,7 +1554,7 @@ define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp ne <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/icmp-rotate.ll b/llvm/test/Transforms/InstCombine/icmp-rotate.ll index eeaa1c786109767..c2d03b1786535c9 100644 --- a/llvm/test/Transforms/InstCombine/icmp-rotate.ll +++ b/llvm/test/Transforms/InstCombine/icmp-rotate.ll @@ -161,7 +161,7 @@ define i1 @ror_ne_cst(i8 %x) { define <2 x i1> @rol_eq_cst_vec(<2 x i5> %x) { ; CHECK-LABEL: @rol_eq_cst_vec( -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i5> [[X:%.*]], splat (i5 8) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %f = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5> %x, <2 x i5> %x, <2 x i5> ) @@ -171,7 +171,7 @@ define <2 x i1> @rol_eq_cst_vec(<2 x i5> %x) { define <2 x i1> @rol_eq_cst_undef(<2 x i5> %x) { ; CHECK-LABEL: @rol_eq_cst_undef( -; CHECK-NEXT: [[F:%.*]] = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5> [[X:%.*]], <2 x i5> [[X]], <2 x i5> ) +; CHECK-NEXT: [[F:%.*]] = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5> [[X:%.*]], <2 x i5> [[X]], <2 x i5> splat (i5 3)) ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i5> [[F]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-select.ll b/llvm/test/Transforms/InstCombine/icmp-select.ll index fb68c6ee942075c..0bdbc88ba67c678 100644 --- a/llvm/test/Transforms/InstCombine/icmp-select.ll +++ b/llvm/test/Transforms/InstCombine/icmp-select.ll @@ -485,7 +485,7 @@ define i1 @select_constants_and_icmp_ne0_all_uses(i1 %x, i1 %y) { define <2 x i1> @select_constants_and_icmp_ne0_vec_splat(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @select_constants_and_icmp_ne0_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = xor <2 x i1> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %s1 = select <2 x i1> %x, <2 x i9> , <2 x i9> diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll b/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll index 0803c03efdbf498..936bcfda1fc9a0e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll @@ -16,7 +16,7 @@ define i1 @icmp_shl_ugt_1(i8 %x) { define <2 x i1> @icmp_shl_ugt_2(<2 x i32> %_x) { ; CHECK-LABEL: @icmp_shl_ugt_2( -; CHECK-NEXT: [[X:%.*]] = add <2 x i32> [[_X:%.*]], +; CHECK-NEXT: [[X:%.*]] = add <2 x i32> [[_X:%.*]], splat (i32 42) ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -28,7 +28,7 @@ define <2 x i1> @icmp_shl_ugt_2(<2 x i32> %_x) { define <3 x i1> @icmp_shl_uge_1(<3 x i7> %x) { ; CHECK-LABEL: @icmp_shl_uge_1( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i7> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i7> [[X:%.*]], splat (i7 -1) ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %add = shl <3 x i7> %x, @@ -63,8 +63,8 @@ define i1 @icmp_shl_ult_1(i16 %x) { define <4 x i1> @icmp_shl_ult_2(<4 x i4> %_x) { ; CHECK-LABEL: @icmp_shl_ult_2( -; CHECK-NEXT: [[X:%.*]] = add <4 x i4> [[_X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = shl <4 x i4> [[X]], +; CHECK-NEXT: [[X:%.*]] = add <4 x i4> [[_X:%.*]], splat (i4 -6) +; CHECK-NEXT: [[ADD:%.*]] = shl <4 x i4> [[X]], splat (i4 1) ; CHECK-NEXT: call void @usev4(<4 x i4> [[ADD]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i4> [[X]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] @@ -78,7 +78,7 @@ define <4 x i1> @icmp_shl_ult_2(<4 x i4> %_x) { define <2 x i1> @icmp_shl_ule_1(<2 x i8> %x) { ; CHECK-LABEL: @icmp_shl_ule_1( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = shl <2 x i8> %x, @@ -110,7 +110,7 @@ define i1 @icmp_shl_eq_1(i8 %x) { define <2 x i1> @icmp_shl_eq_2(<2 x i8> %_x) { ; CHECK-LABEL: @icmp_shl_eq_2( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[_X:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[_X:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll index 6b9ea1f8ef97e22..06175f1635a90d7 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll @@ -194,7 +194,7 @@ define i1 @icmp_sgt11(i8 %x) { define <2 x i1> @icmp_sgt11_vec(<2 x i8> %x) { ; CHECK-LABEL: @icmp_sgt11_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl nsw <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll index 9f50265004f01be..abd2021fae116fa 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll @@ -43,7 +43,7 @@ define <2 x i1> @icmp_ule_16x2(<2 x i64>) { define <2 x i1> @icmp_ule_16x2_nonzero(<2 x i64>) { ; CHECK-LABEL: @icmp_ule_16x2_nonzero( -; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], splat (i64 4) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = shl nuw <2 x i64> %0, @@ -53,7 +53,7 @@ define <2 x i1> @icmp_ule_16x2_nonzero(<2 x i64>) { define <2 x i1> @icmp_ule_12x2(<2 x i64>) { ; CHECK-LABEL: @icmp_ule_12x2( -; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], splat (i64 4) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = shl nuw <2 x i64> %0, @@ -73,7 +73,7 @@ define i1 @icmp_ult_8(i64) { define <2 x i1> @icmp_uge_8x2(<2 x i16>) { ; CHECK-LABEL: @icmp_uge_8x2( -; CHECK-NEXT: [[D:%.*]] = icmp ugt <2 x i16> [[TMP0:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp ugt <2 x i16> [[TMP0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = shl nuw <2 x i16> %0, @@ -83,7 +83,7 @@ define <2 x i1> @icmp_uge_8x2(<2 x i16>) { define <2 x i1> @icmp_ugt_16x2(<2 x i32>) { ; CHECK-LABEL: @icmp_ugt_16x2( -; CHECK-NEXT: [[D:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 15) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = shl nuw <2 x i32> %0, diff --git a/llvm/test/Transforms/InstCombine/icmp-shl.ll b/llvm/test/Transforms/InstCombine/icmp-shl.ll index 5295bd01aa0899f..2b8ec33bcf34dca 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl.ll @@ -67,7 +67,7 @@ define <2 x i1> @shl_vec_nsw_slt_1_0_todo_non_splat(<2 x i8> %x, <2 x i8> %C) { define <2 x i1> @shl_nsw_sle_n1(<2 x i8> %x, <2 x i8> %C) { ; CHECK-LABEL: @shl_nsw_sle_n1( ; CHECK-NEXT: [[Y:%.*]] = shl nsw <2 x i8> [[X:%.*]], [[C:%.*]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[Y]], +; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = shl nsw <2 x i8> %x, %C @@ -150,7 +150,7 @@ define i1 @shl_nuw_ugt_Csle0_fail_missing_flag(i8 %x, i8 %C) { define <2 x i1> @shl_nsw_nuw_sgt_Csle0(<2 x i8> %x, <2 x i8> %C) { ; CHECK-LABEL: @shl_nsw_nuw_sgt_Csle0( -; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -10) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = shl nsw nuw <2 x i8> %x, %C @@ -171,7 +171,7 @@ define <2 x i1> @shl_nsw_nuw_sge_Csle0_todo_non_splat(<2 x i8> %x, <2 x i8> %C) define <2 x i1> @shl_nsw_nuw_sle_Csle0(<2 x i8> %x, <2 x i8> %C) { ; CHECK-LABEL: @shl_nsw_nuw_sle_Csle0( -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 -5) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = shl nsw nuw <2 x i8> %x, %C @@ -182,7 +182,7 @@ define <2 x i1> @shl_nsw_nuw_sle_Csle0(<2 x i8> %x, <2 x i8> %C) { define <2 x i1> @shl_nsw_nuw_slt_Csle0_fail_positive(<2 x i8> %x, <2 x i8> %C) { ; CHECK-LABEL: @shl_nsw_nuw_slt_Csle0_fail_positive( ; CHECK-NEXT: [[Y:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], [[C:%.*]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[Y]], +; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[Y]], splat (i8 6) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = shl nsw nuw <2 x i8> %x, %C diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll index 5f09964fd93ad5f..70f25391847614a 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll @@ -2454,7 +2454,7 @@ define i1 @ashr_00_00_ashr_extra_use(i8 %x, ptr %ptr) { define <4 x i1> @ashr_00_00_vec(<4 x i8> %x) { ; CHECK-LABEL: @ashr_00_00_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 88) ; CHECK-NEXT: ret <4 x i1> [[C]] ; %s = ashr exact <4 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index bdcba9ed1549b96..8aceba04e0aeb4a 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -17,7 +17,7 @@ define i1 @lshr_eq_msb_low_last_zero(i8 %a) { define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) { ; CHECK-LABEL: @lshr_eq_msb_low_last_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], splat (i8 6) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shr = lshr <2 x i8> , %a @@ -1358,7 +1358,7 @@ define i1 @exactly_one_set_signbit_use1(i8 %x, i8 %y) { define <2 x i1> @same_signbit(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @same_signbit( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R1]] ; %xsign = lshr <2 x i8> %x, @@ -1519,7 +1519,7 @@ define i1 @exactly_one_set_signbit_use1_signed(i8 %x, i8 %y) { define <2 x i1> @same_signbit_signed(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @same_signbit_signed( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R1]] ; %xsign = ashr <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-signmask.ll b/llvm/test/Transforms/InstCombine/icmp-signmask.ll index 5424f7d7e8021fc..4127d4071b4f113 100644 --- a/llvm/test/Transforms/InstCombine/icmp-signmask.ll +++ b/llvm/test/Transforms/InstCombine/icmp-signmask.ll @@ -45,7 +45,7 @@ define <2 x i1> @cmp_x_and_negp2_with_ne_or_z(<2 x i8> %x) { define <2 x i1> @cmp_x_and_negp2_with_ne_fail_not_p2(<2 x i8> %x) { ; CHECK-LABEL: @cmp_x_and_negp2_with_ne_fail_not_p2( ; CHECK-NEXT: [[ANDX:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %andx = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll index 8cb3c1c181cec71..4143902bc9c46b1 100644 --- a/llvm/test/Transforms/InstCombine/icmp-sub.ll +++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll @@ -164,7 +164,7 @@ define <2 x i1> @icmp_eq_sub_non_splat(<2 x i32> %a) { define <2 x i1> @icmp_eq_sub_undef2(<2 x i32> %a) { ; CHECK-LABEL: @icmp_eq_sub_undef2( -; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> , [[A:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 15), [[A:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[SUB]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -175,7 +175,7 @@ define <2 x i1> @icmp_eq_sub_undef2(<2 x i32> %a) { define <2 x i1> @icmp_eq_sub_non_splat2(<2 x i32> %a) { ; CHECK-LABEL: @icmp_eq_sub_non_splat2( -; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> , [[A:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 15), [[A:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[SUB]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -312,8 +312,8 @@ define i1 @neg_slt_42(i128 %x) { define <2 x i1> @neg_ugt_42_splat(<2 x i7> %x) { ; CHECK-LABEL: @neg_ugt_42_splat( -; CHECK-NEXT: [[NOTSUB:%.*]] = add <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i7> [[NOTSUB]], +; CHECK-NEXT: [[NOTSUB:%.*]] = add <2 x i7> [[X:%.*]], splat (i7 -1) +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i7> [[NOTSUB]], splat (i7 -43) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %negx = sub <2 x i7> zeroinitializer, %x @@ -606,7 +606,7 @@ entry: define <2 x i1> @PR60818_ne_vector(<2 x i32> %a) { ; CHECK-LABEL: @PR60818_ne_vector( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP0]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-trunc.ll b/llvm/test/Transforms/InstCombine/icmp-trunc.ll index 4de64d85bbf25a0..b85deabf5fa060d 100644 --- a/llvm/test/Transforms/InstCombine/icmp-trunc.ll +++ b/llvm/test/Transforms/InstCombine/icmp-trunc.ll @@ -17,7 +17,7 @@ define i1 @ult_2(i32 %x) { define <2 x i1> @ult_16_splat(<2 x i16> %x) { ; CHECK-LABEL: @ult_16_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2032) ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -83,8 +83,8 @@ define i1 @ult_192(i32 %x) { define <2 x i1> @ult_2044_splat(<2 x i16> %x) { ; CHECK-LABEL: @ult_2044_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2044) +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], splat (i16 2044) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc <2 x i16> %x to <2 x i11> @@ -133,7 +133,7 @@ define i1 @ugt_3(i32 %x) { define <2 x i1> @ugt_7_splat(<2 x i16> %x) { ; CHECK-LABEL: @ugt_7_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2040) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -183,8 +183,8 @@ define i1 @ugt_253(i32 %x) { define <2 x i1> @ugt_2043_splat(<2 x i16> %x) { ; CHECK-LABEL: @ugt_2043_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2044) +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], splat (i16 2044) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc <2 x i16> %x to <2 x i11> @@ -233,7 +233,7 @@ define i1 @slt_0(i32 %x) { define <2 x i1> @slt_0_splat(<2 x i16> %x) { ; CHECK-LABEL: @slt_0_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 1024) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -279,7 +279,7 @@ define i1 @sgt_n1(i32 %x) { define <2 x i1> @sgt_n1_splat(<2 x i16> %x) { ; CHECK-LABEL: @sgt_n1_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 1024) ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -331,7 +331,7 @@ define i1 @trunc_eq_i32_i8(i32 %x) { define <2 x i1> @trunc_eq_v2i32_v2i8(<2 x i32> %x) { ; CHECK-LABEL: @trunc_eq_v2i32_v2i8( ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[T]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[T]], splat (i8 42) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc <2 x i32> %x to <2 x i8> @@ -368,7 +368,7 @@ define i1 @shl1_trunc_eq0(i32 %a) { define <2 x i1> @shl1_trunc_ne0(<2 x i8> %a) { ; CHECK-LABEL: @shl1_trunc_ne0( -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 5) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %shl = shl <2 x i8> , %a @@ -765,7 +765,7 @@ define i1 @sge_nsw_i48(i48 %x) { define <2 x i1> @uge_nsw(<2 x i32> %x) { ; CHECK-LABEL: @uge_nsw( ; CHECK-NEXT: [[T:%.*]] = trunc nsw <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[T]], +; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[T]], splat (i8 -46) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc nsw <2 x i32> %x to <2 x i8> diff --git a/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll b/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll index a61feac024a9c32..fc582708e0b750f 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll @@ -8,8 +8,8 @@ ; Basic tests with one user ; ============================================================================== define i1 @icmp_eq_basic(i8 %arg) { -; CHECK-LABEL: define i1 @icmp_eq_basic -; CHECK-SAME: (i8 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_eq_basic( +; CHECK-SAME: i8 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ARG]], 3 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -19,8 +19,8 @@ define i1 @icmp_eq_basic(i8 %arg) { } define i1 @icmp_ne_basic(i16 %arg) { -; CHECK-LABEL: define i1 @icmp_ne_basic -; CHECK-SAME: (i16 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_ne_basic( +; CHECK-SAME: i16 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i16 [[ARG]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -30,8 +30,8 @@ define i1 @icmp_ne_basic(i16 %arg) { } define i1 @icmp_ule_basic(i32 %arg) { -; CHECK-LABEL: define i1 @icmp_ule_basic -; CHECK-SAME: (i32 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_ule_basic( +; CHECK-SAME: i32 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ARG]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -41,8 +41,8 @@ define i1 @icmp_ule_basic(i32 %arg) { } define i1 @icmp_ult_basic(i64 %arg) { -; CHECK-LABEL: define i1 @icmp_ult_basic -; CHECK-SAME: (i64 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_ult_basic( +; CHECK-SAME: i64 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[ARG]], 15 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -52,8 +52,8 @@ define i1 @icmp_ult_basic(i64 %arg) { } define i1 @icmp_uge_basic(i8 %arg) { -; CHECK-LABEL: define i1 @icmp_uge_basic -; CHECK-SAME: (i8 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_uge_basic( +; CHECK-SAME: i8 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[ARG]], 3 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -63,8 +63,8 @@ define i1 @icmp_uge_basic(i8 %arg) { } define i1 @icmp_ugt_basic(i16 %arg) { -; CHECK-LABEL: define i1 @icmp_ugt_basic -; CHECK-SAME: (i16 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_ugt_basic( +; CHECK-SAME: i16 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ARG]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -74,8 +74,8 @@ define i1 @icmp_ugt_basic(i16 %arg) { } define i1 @icmp_sle_basic(i32 %arg) { -; CHECK-LABEL: define i1 @icmp_sle_basic -; CHECK-SAME: (i32 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_sle_basic( +; CHECK-SAME: i32 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ARG]], 2147483637 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -85,8 +85,8 @@ define i1 @icmp_sle_basic(i32 %arg) { } define i1 @icmp_slt_basic(i64 %arg) { -; CHECK-LABEL: define i1 @icmp_slt_basic -; CHECK-SAME: (i64 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_slt_basic( +; CHECK-SAME: i64 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[ARG]], 9223372036854775783 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -96,8 +96,8 @@ define i1 @icmp_slt_basic(i64 %arg) { } define i1 @icmp_sge_basic(i8 %arg) { -; CHECK-LABEL: define i1 @icmp_sge_basic -; CHECK-SAME: (i8 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_sge_basic( +; CHECK-SAME: i8 [[ARG:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[ARG]], -3 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], 124 ; CHECK-NEXT: ret i1 [[CMP]] @@ -108,8 +108,8 @@ define i1 @icmp_sge_basic(i8 %arg) { } define i1 @icmp_sgt_basic(i16 %arg) { -; CHECK-LABEL: define i1 @icmp_sgt_basic -; CHECK-SAME: (i16 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_sgt_basic( +; CHECK-SAME: i16 [[ARG:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ARG]], -4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[TMP1]], 32762 ; CHECK-NEXT: ret i1 [[CMP]] @@ -123,8 +123,8 @@ define i1 @icmp_sgt_basic(i16 %arg) { ; Tests with more than user ; ============================================================================== define i1 @icmp_eq_multiuse(i8 %arg) { -; CHECK-LABEL: define i1 @icmp_eq_multiuse -; CHECK-SAME: (i8 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_eq_multiuse( +; CHECK-SAME: i8 [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[ARG]], i8 2) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ADD]], 5 ; CHECK-NEXT: call void @use.i8(i8 [[ADD]]) @@ -140,9 +140,9 @@ define i1 @icmp_eq_multiuse(i8 %arg) { ; Tests with vector types ; ============================================================================== define <2 x i1> @icmp_eq_vector_equal(<2 x i8> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_equal -; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], +; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_equal( +; CHECK-SAME: <2 x i8> [[ARG:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %arg, <2 x i8> ) @@ -151,8 +151,8 @@ define <2 x i1> @icmp_eq_vector_equal(<2 x i8> %arg) { } define <2 x i1> @icmp_eq_vector_unequal(<2 x i8> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_unequal -; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { +; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_unequal( +; CHECK-SAME: <2 x i8> [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> ) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] @@ -163,9 +163,9 @@ define <2 x i1> @icmp_eq_vector_unequal(<2 x i8> %arg) { } define <2 x i1> @icmp_ne_vector_equal(<2 x i16> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_equal -; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], +; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_equal( +; CHECK-SAME: <2 x i16> [[ARG:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], splat (i16 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %arg, <2 x i16> ) @@ -174,8 +174,8 @@ define <2 x i1> @icmp_ne_vector_equal(<2 x i16> %arg) { } define <2 x i1> @icmp_ne_vector_unequal(<2 x i16> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_unequal -; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) { +; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_unequal( +; CHECK-SAME: <2 x i16> [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG]], <2 x i16> ) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] @@ -186,9 +186,9 @@ define <2 x i1> @icmp_ne_vector_unequal(<2 x i16> %arg) { } define <2 x i1> @icmp_ule_vector_equal(<2 x i32> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_equal -; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], +; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_equal( +; CHECK-SAME: <2 x i32> [[ARG:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %arg, <2 x i32> ) @@ -197,8 +197,8 @@ define <2 x i1> @icmp_ule_vector_equal(<2 x i32> %arg) { } define <2 x i1> @icmp_ule_vector_unequal(<2 x i32> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_unequal -; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) { +; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_unequal( +; CHECK-SAME: <2 x i32> [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[ARG]], <2 x i32> ) ; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] @@ -209,9 +209,9 @@ define <2 x i1> @icmp_ule_vector_unequal(<2 x i32> %arg) { } define <2 x i1> @icmp_sgt_vector_equal(<2 x i64> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_equal -; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[ARG]], +; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_equal( +; CHECK-SAME: <2 x i64> [[ARG:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[ARG]], splat (i64 9223372036854366185) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %arg, <2 x i64> ) @@ -220,8 +220,8 @@ define <2 x i1> @icmp_sgt_vector_equal(<2 x i64> %arg) { } define <2 x i1> @icmp_sgt_vector_unequal(<2 x i64> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_unequal -; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) { +; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_unequal( +; CHECK-SAME: <2 x i64> [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> [[ARG]], <2 x i64> ) ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i64> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] @@ -235,10 +235,10 @@ define <2 x i1> @icmp_sgt_vector_unequal(<2 x i64> %arg) { ; Tests with vector types and multiple uses ; ============================================================================== define <2 x i1> @icmp_eq_vector_multiuse_equal(<2 x i8> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_equal -; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> ) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], +; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_equal( +; CHECK-SAME: <2 x i8> [[ARG:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 2)) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], splat (i8 5) ; CHECK-NEXT: call void @use.v2i8(<2 x i8> [[ADD]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll index 32ef6267cdf8b4e..347d35548164b21 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll @@ -39,7 +39,7 @@ define i1 @p0(i8 %val, i8 %bits) { define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[VAL_HIGHBITS]], zeroinitializer @@ -56,7 +56,7 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp uge <3 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -84,7 +84,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %val, <3 x i8> %bits) { define <3 x i1> @p2_vec_poison1(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_poison1( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer @@ -216,7 +216,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -229,7 +229,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], ; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]] diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll index c185e632b5194f1..568faea6fc9ab1c 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll @@ -198,7 +198,7 @@ define i1 @n1(i8 %val, i8 %bits) { define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[VAL:%.*]], [[T1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -210,7 +210,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[BITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], ; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll index 25894a22f007514..34fc0002eee1686 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll @@ -39,7 +39,7 @@ define i1 @p0(i8 %val, i8 %bits) { define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS]] ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[VAL_HIGHBITS]], zeroinitializer @@ -56,7 +56,7 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ult <3 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -84,7 +84,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %val, <3 x i8> %bits) { define <3 x i1> @p2_vec_poison1(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_poison1( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]] ; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer @@ -216,7 +216,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -229,7 +229,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], ; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]] diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll index c1912e11b93abfe..d114a979d779d4d 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll @@ -198,7 +198,7 @@ define i1 @n1(i8 %val, i8 %bits) { define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[VAL:%.*]], [[T1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -210,7 +210,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[BITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], ; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll b/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll index 87257e40ac6a2cf..2cd07b17af58062 100644 --- a/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll +++ b/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll @@ -275,7 +275,7 @@ define i1 @icmp_eq_multiuse_negative(i8 %arg) { define <2 x i1> @icmp_eq_vector_positive_equal(<2 x i8> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_positive_equal ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 7) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %arg, <2 x i8> ) @@ -298,7 +298,7 @@ define <2 x i1> @icmp_eq_vector_positive_unequal(<2 x i8> %arg) { define <2 x i1> @icmp_ne_vector_positive_equal(<2 x i16> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_positive_equal ; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], splat (i16 37) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %arg, <2 x i16> ) @@ -321,7 +321,7 @@ define <2 x i1> @icmp_ne_vector_positive_unequal(<2 x i16> %arg) { define <2 x i1> @icmp_ule_vector_positive_equal(<2 x i32> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_positive_equal ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], splat (i32 37) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %arg, <2 x i32> ) @@ -344,8 +344,8 @@ define <2 x i1> @icmp_ule_vector_positive_unequal(<2 x i32> %arg) { define <2 x i1> @icmp_sgt_vector_positive_equal(<2 x i64> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_positive_equal ; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[ARG]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[ARG]], splat (i64 -410858) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 9223372036854774573) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %arg, <2 x i64> ) @@ -371,7 +371,7 @@ define <2 x i1> @icmp_sgt_vector_positive_unequal(<2 x i64> %arg) { define <2 x i1> @icmp_eq_vector_negative_equal(<2 x i8> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_negative_equal ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 -3) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %arg, <2 x i8> ) @@ -397,8 +397,8 @@ define <2 x i1> @icmp_eq_vector_negative_unequal(<2 x i8> %arg) { define <2 x i1> @icmp_eq_vector_multiuse_positive_equal(<2 x i8> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_positive_equal ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> ) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], +; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 2)) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], splat (i8 5) ; CHECK-NEXT: call void @use.v2i8(<2 x i8> [[SUB]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -414,8 +414,8 @@ define <2 x i1> @icmp_eq_vector_multiuse_positive_equal(<2 x i8> %arg) { define <2 x i1> @icmp_eq_vector_multiuse_negative_equal(<2 x i8> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_negative_equal ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> ) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], +; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 -20)) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], splat (i8 5) ; CHECK-NEXT: call void @use.v2i8(<2 x i8> [[SUB]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll b/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll index 677655a0a99ab65..3678946e631a24b 100644 --- a/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll @@ -44,7 +44,7 @@ define <2 x i1> @ule(<2 x i8> %x) { define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { ; CHECK-LABEL: @ult_min_signed_value( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp ult <2 x i8> %x, @@ -55,7 +55,7 @@ define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { define <2 x i1> @sge_zero(<2 x i8> %x) { ; CHECK-LABEL: @sge_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp sge <2 x i8> %x, @@ -64,7 +64,7 @@ define <2 x i1> @sge_zero(<2 x i8> %x) { define <2 x i1> @uge_zero(<2 x i8> %x) { ; CHECK-LABEL: @uge_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp uge <2 x i8> %x, ret <2 x i1> %cmp @@ -72,7 +72,7 @@ define <2 x i1> @uge_zero(<2 x i8> %x) { define <2 x i1> @sle_zero(<2 x i8> %x) { ; CHECK-LABEL: @sle_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp sle <2 x i8> %x, @@ -291,7 +291,7 @@ define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) { define <4 x i1> @splat_icmp(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -302,7 +302,7 @@ define <4 x i1> @splat_icmp(<4 x i8> %x) { define <4 x i1> @splat_icmp_poison(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -313,7 +313,7 @@ define <4 x i1> @splat_icmp_poison(<4 x i8> %x) { define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { ; CHECK-LABEL: @splat_icmp_larger_size( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -324,7 +324,7 @@ define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { define <4 x i1> @splat_fcmp_smaller_size(<5 x float> %x) { ; CHECK-LABEL: @splat_fcmp_smaller_size( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], splat (float 4.200000e+01) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <5 x i1> [[TMP1]], <5 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -339,7 +339,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp_extra_use( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: call void @use_v4i8(<4 x i8> [[SPLATX]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> @@ -353,7 +353,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { define <4 x i1> @not_splat_icmp(<4 x i8> %x) { ; CHECK-LABEL: @not_splat_icmp( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll index 1e392635936b7e1..333b7519c807127 100644 --- a/llvm/test/Transforms/InstCombine/icmp-vec.ll +++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll @@ -46,7 +46,7 @@ define <2 x i1> @ule(<2 x i8> %x) { define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { ; CHECK-LABEL: @ult_min_signed_value( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp ult <2 x i8> %x, @@ -57,7 +57,7 @@ define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { define <2 x i1> @sge_zero(<2 x i8> %x) { ; CHECK-LABEL: @sge_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp sge <2 x i8> %x, @@ -66,7 +66,7 @@ define <2 x i1> @sge_zero(<2 x i8> %x) { define <2 x i1> @uge_zero(<2 x i8> %x) { ; CHECK-LABEL: @uge_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp uge <2 x i8> %x, ret <2 x i1> %cmp @@ -74,7 +74,7 @@ define <2 x i1> @uge_zero(<2 x i8> %x) { define <2 x i1> @sle_zero(<2 x i8> %x) { ; CHECK-LABEL: @sle_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp sle <2 x i8> %x, @@ -293,7 +293,7 @@ define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) { define <4 x i1> @splat_icmp(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -304,7 +304,7 @@ define <4 x i1> @splat_icmp(<4 x i8> %x) { define <4 x i1> @splat_icmp_poison(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -315,7 +315,7 @@ define <4 x i1> @splat_icmp_poison(<4 x i8> %x) { define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { ; CHECK-LABEL: @splat_icmp_larger_size( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -326,7 +326,7 @@ define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { define <4 x i1> @splat_fcmp_smaller_size(<5 x float> %x) { ; CHECK-LABEL: @splat_fcmp_smaller_size( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], splat (float 4.200000e+01) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <5 x i1> [[TMP1]], <5 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -341,7 +341,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp_extra_use( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: call void @use_v4i8(<4 x i8> [[SPLATX]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> @@ -355,7 +355,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { define <4 x i1> @not_splat_icmp(<4 x i8> %x) { ; CHECK-LABEL: @not_splat_icmp( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> @@ -381,7 +381,7 @@ define <4 x i1> @not_splat_icmp2(<4 x i8> %x) { define <2 x i1> @icmp_logical_or_vec(<2 x i64> %x, <2 x i64> %y, <2 x i1> %falseval) { ; CHECK-LABEL: @icmp_logical_or_vec( ; CHECK-NEXT: [[CMP_NE:%.*]] = icmp ne <2 x i64> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP_NE]], <2 x i1> , <2 x i1> [[FALSEVAL:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP_NE]], <2 x i1> splat (i1 true), <2 x i1> [[FALSEVAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[SEL]] ; %cmp.ne = icmp ne <2 x i64> %x, zeroinitializer @@ -520,7 +520,7 @@ define i1 @not_cast_ne-1(<3 x i1> %x) { define i1 @not_cast_ne-1_uses(<3 x i2> %x, ptr %p) { ; CHECK-LABEL: @not_cast_ne-1_uses( -; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i2> [[X:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i2> [[X:%.*]], splat (i2 -1) ; CHECK-NEXT: store <3 x i2> [[NOT]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i2> [[X]] to i6 ; CHECK-NEXT: [[R:%.*]] = icmp ne i6 [[TMP1]], 0 diff --git a/llvm/test/Transforms/InstCombine/icmp-with-selects.ll b/llvm/test/Transforms/InstCombine/icmp-with-selects.ll index 9ee7c78379f5c24..8bf75d0052fec2e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-with-selects.ll +++ b/llvm/test/Transforms/InstCombine/icmp-with-selects.ll @@ -150,7 +150,7 @@ define <4 x i1> @fold_vector_ops(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %pa ; CHECK-SAME: (<4 x i32> [[VAL1:%.*]], <4 x i32> [[VAL2:%.*]], <4 x i32> [[PARAM:%.*]], i1 [[COND:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[VAL2]], [[VAL1]] -; CHECK-NEXT: [[CMP:%.*]] = select i1 [[COND]], <4 x i1> [[TMP0]], <4 x i1> +; CHECK-NEXT: [[CMP:%.*]] = select i1 [[COND]], <4 x i1> [[TMP0]], <4 x i1> splat (i1 true) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll index d08dca225328fef..dd07a7d452a777c 100644 --- a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll +++ b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll @@ -113,7 +113,7 @@ define i1 @sge_to_ugt(i8 %x) { define <2 x i1> @sge_to_ugt_splat(<2 x i8> %x) { ; CHECK-LABEL: @sge_to_ugt_splat( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 -114) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %a = xor <2 x i8> %x, @@ -135,7 +135,7 @@ define i1 @uge_to_sgt(i8 %x) { define <2 x i1> @uge_to_sgt_splat(<2 x i8> %x) { ; CHECK-LABEL: @uge_to_sgt_splat( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -114) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %a = xor <2 x i8> %x, @@ -157,7 +157,7 @@ define i1 @sge_to_ult(i8 %x) { define <2 x i1> @sge_to_ult_splat(<2 x i8> %x) { ; CHECK-LABEL: @sge_to_ult_splat( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 113) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %a = xor <2 x i8> %x, @@ -179,7 +179,7 @@ define i1 @uge_to_slt(i8 %x) { define <2 x i1> @uge_to_slt_splat(<2 x i8> %x) { ; CHECK-LABEL: @uge_to_slt_splat( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 113) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %a = xor <2 x i8> %x, @@ -268,7 +268,7 @@ define i1 @slt_zero_ne_ne_0(i32 %a) { define <4 x i1> @slt_zero_eq_ne_0_vec(<4 x i32> %a) { ; CHECK-LABEL: @slt_zero_eq_ne_0_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i1> [[TMP1]] ; %cmp = icmp ne <4 x i32> %a, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index d52a0b769793737..c1b9752607c3d4e 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -22,7 +22,7 @@ define i32 @test1(i32 %X) { define <2 x i32> @test1vec(<2 x i32> %X) { ; CHECK-LABEL: @test1vec( -; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[X_LOBIT]] ; %a = icmp slt <2 x i32> %X, zeroinitializer @@ -43,7 +43,7 @@ define i32 @test2(i32 %X) { define <2 x i32> @test2vec(<2 x i32> %X) { ; CHECK-LABEL: @test2vec( -; CHECK-NEXT: [[A:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[A]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[B]] ; @@ -167,7 +167,7 @@ define i1 @test9(i32 %x) { define <2 x i1> @test9_vec(<2 x i32> %x) { ; CHECK-LABEL: @test9_vec( -; CHECK-NEXT: [[B:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = add <2 x i32> %x, @@ -187,7 +187,7 @@ define i1 @test9b(i32 %x) { define <2 x i1> @test9b_vec(<2 x i32> %x) { ; CHECK-LABEL: @test9b_vec( -; CHECK-NEXT: [[B:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = add <2 x i32> %x, @@ -207,7 +207,7 @@ define i1 @test10(i32 %x) { define <2 x i1> @test10_vec(<2 x i32> %x) { ; CHECK-LABEL: @test10_vec( -; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = add <2 x i32> %x, @@ -227,7 +227,7 @@ define i1 @test10b(i32 %x) { define <2 x i1> @test10b_vec(<2 x i32> %x) { ; CHECK-LABEL: @test10b_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = add <2 x i32> %x, @@ -246,7 +246,7 @@ define i1 @test11(i32 %x) { define <2 x i1> @test11_vec(<2 x i32> %x) { ; CHECK-LABEL: @test11_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a = add nsw <2 x i32> %x, %b = icmp slt <2 x i32> %x, %a @@ -310,7 +310,7 @@ define i1 @test17(i32 %x) { define <2 x i1> @test17vec(<2 x i32> %x) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %shl = shl <2 x i32> , %x @@ -332,7 +332,7 @@ define i1 @test17a(i32 %x) { define <2 x i1> @test17a_vec(<2 x i32> %x) { ; CHECK-LABEL: @test17a_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %x @@ -354,7 +354,7 @@ define i1 @test18_eq(i32 %x) { define <2 x i1> @test18_eq_vec(<2 x i32> %x) { ; CHECK-LABEL: @test18_eq_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %sh = lshr <2 x i32> , %x @@ -376,7 +376,7 @@ define i1 @test18_ne(i32 %x) { define <2 x i1> @test18_ne_vec(<2 x i32> %x) { ; CHECK-LABEL: @test18_ne_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %sh = lshr <2 x i32> , %x @@ -398,7 +398,7 @@ define i1 @test19(i32 %x) { define <2 x i1> @test19vec(<2 x i32> %x) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %shl = shl <2 x i32> , %x @@ -430,7 +430,7 @@ define i1 @test20(i32 %x) { define <2 x i1> @test20vec(<2 x i32> %x) { ; CHECK-LABEL: @test20vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %shl = shl <2 x i32> , %x @@ -452,7 +452,7 @@ define i1 @test20a(i32 %x) { define <2 x i1> @test20a_vec(<2 x i32> %x) { ; CHECK-LABEL: @test20a_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %x @@ -494,7 +494,7 @@ define i1 @test23(i32 %x) { define <2 x i1> @test23vec(<2 x i32> %x) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[I4:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[I4:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 1328634634) ; CHECK-NEXT: ret <2 x i1> [[I4]] ; %i3 = sdiv <2 x i32> %x, @@ -745,7 +745,7 @@ define i1 @shr_exact(i132 %x) { define <2 x i1> @shr_exact_vec(<2 x i132> %x) { ; CHECK-LABEL: @shr_exact_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i132> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i132> [[X:%.*]], splat (i132 32) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sh = lshr exact <2 x i132> %x, @@ -1004,7 +1004,7 @@ define i1 @test_sdiv_neg_slt(i32 %x, i32 %y) { define <2 x i1> @test49(<2 x i32> %i3) { ; CHECK-LABEL: @test49( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; entry: %i11 = and <2 x i32> %i3, @@ -1137,7 +1137,7 @@ define i1 @test55(i32 %a) { define <2 x i1> @test55vec(<2 x i32> %a) { ; CHECK-LABEL: @test55vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -123) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = sub <2 x i32> zeroinitializer, %a @@ -1157,7 +1157,7 @@ define i1 @test56(i32 %a) { define <2 x i1> @test56vec(<2 x i32> %a) { ; CHECK-LABEL: @test56vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -113) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = sub <2 x i32> , %a @@ -1422,7 +1422,7 @@ define i1 @test67inverse(i32 %x) { define <2 x i1> @test67vec(<2 x i32> %x) { ; CHECK-LABEL: @test67vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1433,7 +1433,7 @@ define <2 x i1> @test67vec(<2 x i32> %x) { define <2 x i1> @test67vec2(<2 x i32> %x) { ; CHECK-LABEL: @test67vec2( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1444,7 +1444,7 @@ define <2 x i1> @test67vec2(<2 x i32> %x) { define <2 x i1> @test67vecinverse(<2 x i32> %x) { ; CHECK-LABEL: @test67vecinverse( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1479,7 +1479,7 @@ define i1 @test70(i32 %X) { define <2 x i1> @test70vec(<2 x i32> %X) { ; CHECK-LABEL: @test70vec( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = add <2 x i32> %X, @@ -1515,7 +1515,7 @@ define i1 @icmp_sext8trunc(i32 %x) { define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_sext8trunc_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], splat (i8 36) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %trunc = trunc <2 x i32> %x to <2 x i8> @@ -1551,7 +1551,7 @@ define i1 @icmp_shl17(i32 %x) { define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_shl16_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], splat (i16 36) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> %x, @@ -1583,7 +1583,7 @@ define i1 @icmp_shl_eq(i32 %x) { define <2 x i1> @icmp_shl_eq_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_shl_eq_vec( -; CHECK-NEXT: [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 134217727) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[MUL_MASK]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1625,7 +1625,7 @@ define i1 @icmp_shl_ne(i32 %x) { define <2 x i1> @icmp_shl_ne_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_shl_ne_vec( -; CHECK-NEXT: [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 33554431) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[MUL_MASK]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1636,7 +1636,7 @@ define <2 x i1> @icmp_shl_ne_vec(<2 x i32> %x) { define <2 x i1> @icmp_shl_nuw_ne_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_shl_nuw_ne_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl nuw <2 x i32> %x, @@ -1779,7 +1779,7 @@ define i1 @icmp_add20_eq_add57(i32 %x, i32 %y) { define <2 x i1> @icmp_add20_eq_add57_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_add20_eq_add57_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1791,7 +1791,7 @@ define <2 x i1> @icmp_add20_eq_add57_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_add20_eq_add57_poison(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_add20_eq_add57_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1827,7 +1827,7 @@ define i1 @icmp_sub57_ne_sub20(i32 %x, i32 %y) { define <2 x i1> @icmp_sub57_ne_sub20_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_ne_sub20_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -37) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1839,7 +1839,7 @@ define <2 x i1> @icmp_sub57_ne_sub20_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_sub57_ne_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_ne_sub20_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -37) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1851,7 +1851,7 @@ define <2 x i1> @icmp_sub57_ne_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_sub57_ne_sub20_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_ne_sub20_vec_nonsplat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1915,7 +1915,7 @@ define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) { define <2 x i1> @icmp_add20_sge_add57_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_add20_sge_add57_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1927,7 +1927,7 @@ define <2 x i1> @icmp_add20_sge_add57_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_add20_sge_add57_poison(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_add20_sge_add57_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1964,7 +1964,7 @@ define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) { define <2 x i1> @icmp_sub57_sge_sub20_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_sge_sub20_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -37) ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1976,7 +1976,7 @@ define <2 x i1> @icmp_sub57_sge_sub20_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_sub57_sge_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_sge_sub20_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -37) ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -2085,8 +2085,8 @@ define i1 @icmp_add_and_shr_ne_0(i32 %X) { define <2 x i1> @icmp_add_and_shr_ne_0_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_add_and_shr_ne_0_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 240) +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 224) ; CHECK-NEXT: ret <2 x i1> [[TOBOOL]] ; %shr = lshr <2 x i32> %X, @@ -2291,7 +2291,7 @@ define i1 @icmp_shl_1_V_ult_32(i32 %V) { define <2 x i1> @icmp_shl_1_V_ult_32_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ult_32_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2311,7 +2311,7 @@ define i1 @icmp_shl_1_V_eq_32(i32 %V) { define <2 x i1> @icmp_shl_1_V_eq_32_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_eq_32_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2331,7 +2331,7 @@ define i1 @icmp_shl_1_V_ult_30(i32 %V) { define <2 x i1> @icmp_shl_1_V_ult_30_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ult_30_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2351,7 +2351,7 @@ define i1 @icmp_shl_1_V_ugt_30(i32 %V) { define <2 x i1> @icmp_shl_1_V_ugt_30_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ugt_30_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2371,7 +2371,7 @@ define i1 @icmp_shl_1_V_ule_30(i32 %V) { define <2 x i1> @icmp_shl_1_V_ule_30_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ule_30_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2391,7 +2391,7 @@ define i1 @icmp_shl_1_V_uge_30(i32 %V) { define <2 x i1> @icmp_shl_1_V_uge_30_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_uge_30_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2411,7 +2411,7 @@ define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) { define <2 x i1> @icmp_shl_1_V_uge_2147483648_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_uge_2147483648_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2431,7 +2431,7 @@ define i1 @icmp_shl_1_V_ult_2147483648(i32 %V) { define <2 x i1> @icmp_shl_1_V_ult_2147483648_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2451,7 +2451,7 @@ define i1 @icmp_shl_1_V_sle_0(i32 %V) { define <2 x i1> @icmp_shl_1_V_sle_0_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_sle_0_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2471,7 +2471,7 @@ define i1 @icmp_shl_1_V_sle_negative(i32 %V) { define <2 x i1> @icmp_shl_1_V_sle_0_negative(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_sle_0_negative( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2491,7 +2491,7 @@ define i1 @icmp_shl_1_V_sgt_0(i32 %V) { define <2 x i1> @icmp_shl_1_V_sgt_0_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_sgt_0_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2511,7 +2511,7 @@ define i1 @icmp_shl_1_V_sgt_negative(i32 %V) { define <2 x i1> @icmp_shl_1_V_sgt_negative_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_sgt_negative_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2546,7 +2546,7 @@ define i1 @or_icmp_eq_B_0_icmp_ult_A_B_logical(i64 %a, i64 %b) { define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_uniform(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -2558,7 +2558,7 @@ define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_uniform(<2 x i64> %a, <2 x i64> %b) define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_poison(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -2792,7 +2792,7 @@ define i1 @and_icmp_ne_B_0_icmp_uge_A_B_logical(i64 %a, i64 %b) { define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_uniform(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @and_icmp_ne_B_0_icmp_uge_A_B_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -2804,7 +2804,7 @@ define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_uniform(<2 x i64> %a, <2 x i64> %b define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_poison(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @and_icmp_ne_B_0_icmp_uge_A_B_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -2827,8 +2827,8 @@ define i1 @icmp_add_ult_2(i32 %X) { define <2 x i1> @icmp_add_X_-14_ult_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_add_X_-14_ult_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 14) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = add <2 x i32> %X, @@ -2849,8 +2849,8 @@ define i1 @icmp_sub_3_X_ult_2(i32 %X) { define <2 x i1> @icmp_sub_3_X_ult_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_sub_3_X_ult_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = sub <2 x i32> , %X @@ -2871,8 +2871,8 @@ define i1 @icmp_add_X_-14_uge_2(i32 %X) { define <2 x i1> @icmp_add_X_-14_uge_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_add_X_-14_uge_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 14) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = add <2 x i32> %X, @@ -2893,8 +2893,8 @@ define i1 @icmp_sub_3_X_uge_2(i32 %X) { define <2 x i1> @icmp_sub_3_X_uge_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_sub_3_X_uge_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = sub <2 x i32> , %X @@ -2914,7 +2914,7 @@ define i1 @icmp_and_X_-16_eq-16(i32 %X) { define <2 x i1> @icmp_and_X_-16_eq-16_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_and_X_-16_eq-16_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -17) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %and = and <2 x i32> %X, @@ -2934,7 +2934,7 @@ define i1 @icmp_and_X_-16_ne-16(i32 %X) { define <2 x i1> @icmp_and_X_-16_ne-16_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_and_X_-16_ne-16_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 -16) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %and = and <2 x i32> %X, @@ -2959,7 +2959,7 @@ define i1 @or1_eq1(i32 %x) { define <2 x i1> @or3_eq3_vec(<2 x i8> %x) { ; CHECK-LABEL: @or3_eq3_vec( -; CHECK-NEXT: [[T1:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[T1]] ; %t0 = or <2 x i8> %x, @@ -2983,7 +2983,7 @@ define i1 @or7_ne7(i32 %x) { define <2 x i1> @or63_ne63_vec(<2 x i8> %x) { ; CHECK-LABEL: @or63_ne63_vec( -; CHECK-NEXT: [[T1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 63) ; CHECK-NEXT: ret <2 x i1> [[T1]] ; %t0 = or <2 x i8> %x, @@ -3009,7 +3009,7 @@ define i1 @orC_eqC(i32 %x) { define <2 x i1> @orC_eqC_vec(<2 x i8> %x) { ; CHECK-LABEL: @orC_eqC_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -44) ; CHECK-NEXT: [[T1:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T1]] ; @@ -3035,7 +3035,7 @@ define i1 @orC_neC(i32 %x) { define <2 x i1> @orC_neC_vec(<2 x i8> %x) { ; CHECK-LABEL: @orC_neC_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[T1:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T1]] ; @@ -3057,8 +3057,8 @@ define i1 @shrink_constant(i32 %X) { define <2 x i1> @shrink_constant_vec(<2 x i32> %X) { ; CHECK-LABEL: @shrink_constant_vec( -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[XOR]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -12) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[XOR]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %xor = xor <2 x i32> %X, @@ -3079,7 +3079,7 @@ define i1 @icmp_sub_-1_X_ult_4(i32 %X) { define <2 x i1> @icmp_xor_neg4_X_ult_4_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_xor_neg4_X_ult_4_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %xor = xor <2 x i32> %X, @@ -3099,7 +3099,7 @@ define i1 @icmp_sub_-1_X_uge_4(i32 %X) { define <2 x i1> @icmp_xor_neg4_X_uge_4_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_xor_neg4_X_uge_4_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 -4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %xor = xor <2 x i32> %X, @@ -3109,7 +3109,7 @@ define <2 x i1> @icmp_xor_neg4_X_uge_4_vec(<2 x i32> %X) { define <2 x i1> @xor_ult(<2 x i8> %x) { ; CHECK-LABEL: @xor_ult( -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %xor = xor <2 x i8> %x, @@ -3132,7 +3132,7 @@ define i1 @xor_ult_extra_use(i8 %x, ptr %p) { define <2 x i1> @xor_ugt(<2 x i8> %x) { ; CHECK-LABEL: @xor_ugt( -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %xor = xor <2 x i8> %x, @@ -3233,7 +3233,7 @@ define <2 x i1> @icmp_and_or_lshr_vec(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_and_or_lshr_vec_commute(<2 x i32> %xp, <2 x i32> %y) { ; CHECK-LABEL: @icmp_and_or_lshr_vec_commute( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42) ; CHECK-NEXT: [[SHF:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[X]], [[SHF]] ; CHECK-NEXT: [[RET:%.*]] = trunc <2 x i32> [[OR]] to <2 x i1> @@ -3262,7 +3262,7 @@ define i1 @icmp_and_or_lshr_cst(i32 %x) { define <2 x i1> @icmp_and_or_lshr_cst_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -3301,8 +3301,8 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_poison(<2 x i32> %x) { define <2 x i1> @icmp_and_or_lshr_cst_vec_commute(<2 x i32> %xp) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_commute( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], splat (i32 3) ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -3316,7 +3316,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_commute(<2 x i32> %xp) { define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_nonuniform_commute( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] @@ -3331,7 +3331,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) { define <2 x i1> @icmp_and_or_lshr_cst_vec_poison_commute(<2 x i32> %xp) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_poison_commute( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] @@ -3356,7 +3356,7 @@ define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) { define <2 x i1> @shl_ap1_zero_ap2_non_zero_2_vec(<2 x i32> %a) { ; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 29) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %a @@ -3719,8 +3719,8 @@ define i1 @cmp_inverse_mask_bits_set_eq(i32 %x) { define <2 x i1> @cmp_inverse_mask_bits_set_eq_vec(<2 x i32> %x) { ; CHECK-LABEL: @cmp_inverse_mask_bits_set_eq_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -43) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 -43) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %or = or <2 x i32> %x, @@ -3790,8 +3790,8 @@ define i1 @ugtKnownBits(i8 %a) { define <2 x i1> @ugtKnownBitsVec(<2 x i8> %a) { ; CHECK-LABEL: @ugtKnownBitsVec( -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 17) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[B]], splat (i8 17) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %b = and <2 x i8> %a, @@ -4069,9 +4069,9 @@ define i1 @knownbits3(i8 %a, i8 %b) { define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @knownbits4( -; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], +; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 1) +; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 2) +; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], splat (i8 1) ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], [[A1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -4119,9 +4119,9 @@ define i1 @knownbits6(i8 %a, i8 %b) { define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @knownbits7( -; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], +; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 -127) +; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 2) +; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], splat (i8 1) ; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[TMP1]], [[A1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -4240,7 +4240,7 @@ define i1 @signbit_bitcast_fptrunc(float %x) { define <2 x i1> @signbit_bitcast_fptrunc_vec(<2 x double> %x) { ; CHECK-LABEL: @signbit_bitcast_fptrunc_vec( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i64> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %f = fptrunc <2 x double> %x to <2 x half> @@ -4266,7 +4266,7 @@ define <4 x i1> @signbit_bitcast_fpext_vec_wrong_bitcast(<2 x half> %x) { ; CHECK-LABEL: @signbit_bitcast_fpext_vec_wrong_bitcast( ; CHECK-NEXT: [[F:%.*]] = fpext <2 x half> [[X:%.*]] to <2 x float> ; CHECK-NEXT: [[B:%.*]] = bitcast <2 x float> [[F]] to <4 x i16> -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i16> [[B]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i16> [[B]], splat (i16 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %f = fpext <2 x half> %x to <2 x float> @@ -4536,9 +4536,9 @@ define i1 @redundant_sign_bit_count_i8(i8 %x) { define <2 x i1> @redundant_sign_bit_count_ult_31_30_vector(<2 x i32> %xsrc) { ; CHECK-LABEL: @redundant_sign_bit_count_ult_31_30_vector( -; CHECK-NEXT: [[X:%.*]] = mul <2 x i32> [[XSRC:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X]], -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[TMP1]], +; CHECK-NEXT: [[X:%.*]] = mul <2 x i32> [[XSRC:%.*]], splat (i32 13) +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X]], splat (i32 1073741824) +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %x = mul <2 x i32> %xsrc, ; thwart complexity-based canonicalization @@ -4612,10 +4612,10 @@ define i1 @zext_bool_and_eq0(i1 %x, i8 %y) { define <2 x i1> @zext_bool_and_eq0_commute(<2 x i1> %x, <2 x i8> %p) { ; CHECK-LABEL: @zext_bool_and_eq0_commute( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], splat (i8 1) ; CHECK-NEXT: [[R1:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_X]], <2 x i1> , <2 x i1> [[R1]] +; CHECK-NEXT: [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_X]], <2 x i1> splat (i1 true), <2 x i1> [[R1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %y = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization @@ -4742,7 +4742,7 @@ define i1 @or_positive_sgt_zero(i8 %a) { define <2 x i1> @or_postive_sgt_zero_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sgt_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4773,7 +4773,7 @@ define i1 @or_positive_sge_zero(i8 %a) { define <2 x i1> @or_postive_sge_zero_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sge_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4804,7 +4804,7 @@ define i1 @or_positive_sge_postive(i8 %a) { define <2 x i1> @or_postive_sge_positive_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sge_positive_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4930,7 +4930,7 @@ define i1 @or_positive_sgt_neg(i8 %a) { define <2 x i1> @or_postive_sgt_neg_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sgt_neg_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4952,8 +4952,8 @@ define i1 @mul_or_positive_sge_neg(i8 %a) { define <2 x i1> @or_postive_sge_neg_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sge_neg_vec( -; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24) +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], splat (i8 -2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4975,8 +4975,8 @@ define i1 @mul_or_small_sge_large(i8 %a) { define <2 x i1> @or_small_sge_large_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_small_sge_large_vec( -; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24) +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], splat (i8 24) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -5019,8 +5019,8 @@ define i1 @or_positive_slt_neg(i8 %a) { define <2 x i1> @or_postive_slt_neg_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_slt_neg_vec( -; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[B]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -5042,8 +5042,8 @@ define i1 @or_small_slt_large(i8 %a) { define <2 x i1> @or_small_slt_large_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_small_slt_large_vec( -; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[B]], splat (i8 25) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -5303,7 +5303,7 @@ define i1 @test_icmp_shl_nsw_i31(i31 %x) { define <2 x i1> @test_icmp_shl_vec(<2 x i64> %x) { ; CHECK-LABEL: @test_icmp_shl_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i64> %x, splat(i64 32) diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll index 6cbb2a246f5a4a5..8bc915e695aa75d 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll @@ -725,7 +725,7 @@ define <4 x float> @insert_demanded_element_op1(<4 x float> %x, <4 x float> %y) define <4 x float> @splat_constant(<4 x float> %x) { ; CHECK-LABEL: @splat_constant( ; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[X:%.*]], float 3.000000e+00, i64 3 -; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <4 x float> [[R]] ; %ins3 = insertelement <4 x float> %x, float 3.0, i32 3 diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll index c87e2e8596c62dd..f51e444a815c80f 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -781,7 +781,7 @@ define <5 x float> @insert_demanded_element_unequal_length_op1(<4 x float> %x, < define <4 x float> @splat_constant(<4 x float> %x) { ; CHECK-LABEL: @splat_constant( ; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[X:%.*]], float 3.000000e+00, i64 3 -; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <4 x float> [[R]] ; %ins3 = insertelement <4 x float> %x, float 3.0, i32 3 diff --git a/llvm/test/Transforms/InstCombine/insertelement.ll b/llvm/test/Transforms/InstCombine/insertelement.ll index c8df2db6e70caac..72dcd929ba0cb8a 100644 --- a/llvm/test/Transforms/InstCombine/insertelement.ll +++ b/llvm/test/Transforms/InstCombine/insertelement.ll @@ -15,7 +15,7 @@ define <4 x i32> @insert_known_idx(<4 x i32> %x) { define <4 x i32> @insert_unknown_idx(<4 x i32> %x, i32 %idx) { ; CHECK-LABEL: @insert_unknown_idx( -; CHECK-NEXT: [[V1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[V1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 6, i32 [[IDX:%.*]] ; CHECK-NEXT: ret <4 x i32> [[V2]] ; @@ -27,7 +27,7 @@ define <4 x i32> @insert_unknown_idx(<4 x i32> %x, i32 %idx) { define <2 x i8> @insert_known_any_idx(<2 x i8> %xx, i8 %yy, i32 %idx) { ; CHECK-LABEL: @insert_known_any_idx( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 16) ; %x = or <2 x i8> %xx, %y = or i8 %yy, 16 @@ -42,7 +42,7 @@ define <2 x i8> @insert_known_any_idx_fail1(<2 x i8> %xx, i8 %yy, i32 %idx) { ; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], ; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], 16 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], splat (i8 16) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x = or <2 x i8> %xx, @@ -59,7 +59,7 @@ define <2 x i8> @insert_known_any_idx_fail2(<2 x i8> %xx, i8 %yy, i32 %idx) { ; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], ; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], 15 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], splat (i8 16) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x = or <2 x i8> %xx, diff --git a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll index afd56abf40a5075..4d8993dfb331597 100644 --- a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll +++ b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll @@ -74,8 +74,8 @@ define i8 @t3_commutative(i8 %x) { ; Basic splat vector test define <2 x i8> @t4_splat(<2 x i8> %x) { ; CHECK-LABEL: @t4_splat( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -89,8 +89,8 @@ define <2 x i8> @t4_splat(<2 x i8> %x) { ; Splat-with-poison define <2 x i8> @t5_splat_poison_0b0001(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_poison_0b0001( -; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], +; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -102,8 +102,8 @@ define <2 x i8> @t5_splat_poison_0b0001(<2 x i8> %x) { } define <2 x i8> @t5_splat_poison_0b0010(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_poison_0b0010( -; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], +; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -115,8 +115,8 @@ define <2 x i8> @t5_splat_poison_0b0010(<2 x i8> %x) { } define <2 x i8> @t5_splat_poison_0b0100(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_poison_0b0100( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -128,8 +128,8 @@ define <2 x i8> @t5_splat_poison_0b0100(<2 x i8> %x) { } define <2 x i8> @t5_splat_poison_0b1000(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_poison_0b1000( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -161,10 +161,10 @@ define <2 x i8> @t6_nonsplat(<2 x i8> %x) { ; Even if the alignment (and masks) are splat, the bias could be non-splat define <2 x i8> @t7_nonsplat_bias(<2 x i8> %x) { ; CHECK-LABEL: @t7_nonsplat_bias( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 15) ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16) ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; @@ -441,9 +441,9 @@ define i8 @t17_oneuse(i8 %x) { ; more poisonous. define <2 x i4> @t18_replacement_0b0001(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b0001( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 3) ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i4> [[X_LOWBITS]], zeroinitializer -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X]], splat (i4 3) ; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i4> [[X]], <2 x i4> [[X_BIASED_HIGHBITS]] @@ -461,10 +461,10 @@ define <2 x i4> @t18_replacement_0b0001(<2 x i4> %x) { ; more poisonous. define <2 x i4> @t18_replacement_0b0010(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b0010( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 3) ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i4> [[X_LOWBITS]], zeroinitializer ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], splat (i4 -4) ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i4> [[X]], <2 x i4> [[X_BIASED_HIGHBITS]] ; CHECK-NEXT: ret <2 x i4> [[X_ROUNDEDUP]] @@ -479,8 +479,8 @@ define <2 x i4> @t18_replacement_0b0010(<2 x i4> %x) { } define <2 x i4> @t18_replacement_0b0100(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b0100( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], splat (i4 3) +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], splat (i4 -4) ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]] ; @@ -494,8 +494,8 @@ define <2 x i4> @t18_replacement_0b0100(<2 x i4> %x) { } define <2 x i4> @t18_replacement_0b1000(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b1000( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], splat (i4 3) +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], splat (i4 -4) ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]] ; diff --git a/llvm/test/Transforms/InstCombine/intrinsic-select.ll b/llvm/test/Transforms/InstCombine/intrinsic-select.ll index f110d7765830efa..4ce2908a6307858 100644 --- a/llvm/test/Transforms/InstCombine/intrinsic-select.ll +++ b/llvm/test/Transforms/InstCombine/intrinsic-select.ll @@ -297,7 +297,7 @@ define double @test_fabs_select1(double %a) { define <2 x double> @test_fabs_select1_vec(<2 x double> %a) { ; CHECK-LABEL: @test_fabs_select1_vec( ; CHECK-NEXT: [[COND:%.*]] = fcmp uno <2 x double> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[COND]], <2 x double> , <2 x double> [[A]] +; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[COND]], <2 x double> splat (double 0x7FF8000000000000), <2 x double> [[A]] ; CHECK-NEXT: ret <2 x double> [[SEL2]] ; %cond = fcmp uno <2 x double> %a, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll index c8d70e17cd39211..9dceb419215e556 100644 --- a/llvm/test/Transforms/InstCombine/intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/intrinsics.ll @@ -62,7 +62,7 @@ define i32 @cttz(i32 %a) { define <2 x i32> @cttz_vec(<2 x i32> %a) { ; CHECK-LABEL: @cttz_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 3) ; %or = or <2 x i32> %a, %and = and <2 x i32> %or, @@ -90,7 +90,7 @@ define i1 @cttz_i1_zero_is_poison(i1 %arg) { define <2 x i1> @cttz_v2i1(<2 x i1> %arg) { ; CHECK-LABEL: @cttz_v2i1( -; CHECK-NEXT: [[CNT:%.*]] = xor <2 x i1> [[ARG:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = xor <2 x i1> [[ARG:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[CNT]] ; %cnt = call <2 x i1> @llvm.cttz.v2i1(<2 x i1> %arg, i1 false) nounwind readnone @@ -149,7 +149,7 @@ define i32 @cttz_knownbits2(i32 %arg) { define <2 x i32> @cttz_knownbits2_vec(<2 x i32> %arg) { ; CHECK-LABEL: @cttz_knownbits2_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], splat (i32 4) ; CHECK-NEXT: [[CNT:%.*]] = call range(i32 0, 3) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) #[[ATTR2]] ; CHECK-NEXT: ret <2 x i32> [[CNT]] ; @@ -190,7 +190,7 @@ define i8 @ctlz(i8 %a) { define <2 x i8> @ctlz_vec(<2 x i8> %a) { ; CHECK-LABEL: @ctlz_vec( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 2) ; %or = or <2 x i8> %a, %and = and <2 x i8> %or, @@ -218,7 +218,7 @@ define i1 @ctlz_i1_zero_is_poison(i1 %arg) { define <2 x i1> @ctlz_v2i1(<2 x i1> %arg) { ; CHECK-LABEL: @ctlz_v2i1( -; CHECK-NEXT: [[CNT:%.*]] = xor <2 x i1> [[ARG:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = xor <2 x i1> [[ARG:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[CNT]] ; %cnt = call <2 x i1> @llvm.ctlz.v2i1(<2 x i1> %arg, i1 false) nounwind readnone @@ -266,7 +266,7 @@ define i8 @ctlz_knownbits2(i8 %arg) { define <2 x i8> @ctlz_knownbits2_vec(<2 x i8> %arg) { ; CHECK-LABEL: @ctlz_knownbits2_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], splat (i8 32) ; CHECK-NEXT: [[CNT:%.*]] = call range(i8 0, 3) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true) #[[ATTR2]] ; CHECK-NEXT: ret <2 x i8> [[CNT]] ; @@ -324,7 +324,7 @@ define i32 @ctlz_no_zero(i32 %a) { define <2 x i32> @ctlz_no_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_no_zero_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 8) ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 29) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; @@ -362,7 +362,7 @@ define i32 @cttz_no_zero(i32 %a) { define <2 x i32> @cttz_no_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @cttz_no_zero_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 8) ; CHECK-NEXT: [[CTTZ:%.*]] = tail call range(i32 0, 4) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[CTTZ]] ; diff --git a/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll index 0440199dadb873b..c73b07cfbc66812 100644 --- a/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll +++ b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll @@ -52,7 +52,7 @@ define <2 x i4> @in_constant_varx_mone_invmask(<2 x i4> %x, <2 x i4> %mask) { define <2 x i4> @in_constant_varx_6_invmask(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_6_invmask( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], splat (i4 6) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -94,7 +94,7 @@ define <3 x i4> @in_constant_varx_6_invmask_poison(<3 x i4> %x, <3 x i4> %mask) define <2 x i4> @in_constant_mone_vary_invmask(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_mone_vary_invmask( -; CHECK-NEXT: [[MASK_NOT:%.*]] = xor <2 x i4> [[MASK:%.*]], +; CHECK-NEXT: [[MASK_NOT:%.*]] = xor <2 x i4> [[MASK:%.*]], splat (i4 -1) ; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[Y:%.*]], [[MASK_NOT]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -107,9 +107,9 @@ define <2 x i4> @in_constant_mone_vary_invmask(<2 x i4> %y, <2 x i4> %mask) { define <2 x i4> @in_constant_6_vary_invmask(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_6_vary_invmask( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], splat (i4 6) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 6) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %notmask = xor <2 x i4> %mask, @@ -268,7 +268,7 @@ define <2 x i4> @c_1_1_1 (<2 x i4> %m) { define <2 x i4> @commutativity_constant_varx_6_invmask(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @commutativity_constant_varx_6_invmask( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], splat (i4 6) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -282,9 +282,9 @@ define <2 x i4> @commutativity_constant_varx_6_invmask(<2 x i4> %x, <2 x i4> %ma define <2 x i4> @commutativity_constant_6_vary_invmask(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @commutativity_constant_6_vary_invmask( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], splat (i4 6) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 6) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %notmask = xor <2 x i4> %mask, @@ -320,7 +320,7 @@ define <2 x i4> @n_oneuse_D_is_ok (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @n_oneuse_A( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], splat (i4 -1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] @@ -337,7 +337,7 @@ define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { define <2 x i4> @n_oneuse_AD (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @n_oneuse_AD( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], splat (i4 -1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] @@ -358,7 +358,7 @@ define <2 x i4> @n_oneuse_AD (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { define <2 x i4> @n_third_var (<2 x i4> %x, <2 x i4> %y, <2 x i4> %z, <2 x i4> %m) { ; CHECK-LABEL: @n_third_var( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], splat (i4 -1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Z:%.*]] @@ -374,7 +374,7 @@ define <2 x i4> @n_third_var (<2 x i4> %x, <2 x i4> %y, <2 x i4> %z, <2 x i4> %m define <2 x i4> @n_third_var_const(<2 x i4> %x, <2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @n_third_var_const( -; CHECK-NEXT: [[NOTMASK:%.*]] = xor <2 x i4> [[MASK:%.*]], +; CHECK-NEXT: [[NOTMASK:%.*]] = xor <2 x i4> [[MASK:%.*]], splat (i4 -1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[NOTMASK]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], @@ -391,7 +391,7 @@ define <2 x i4> @n_third_var_const(<2 x i4> %x, <2 x i4> %y, <2 x i4> %mask) { define <2 x i4> @n_badxor_splat (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @n_badxor_splat( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], splat (i4 1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] diff --git a/llvm/test/Transforms/InstCombine/is_fpclass.ll b/llvm/test/Transforms/InstCombine/is_fpclass.ll index 24a331dfb48bb01..c1809b8bec61cb4 100644 --- a/llvm/test/Transforms/InstCombine/is_fpclass.ll +++ b/llvm/test/Transforms/InstCombine/is_fpclass.ll @@ -504,7 +504,7 @@ define i1 @test_class_is_pinf_or_nan_f32(float %x) { define <2 x i1> @test_class_is_pinf_v2f32(<2 x float> %x) { ; CHECK-LABEL: @test_class_is_pinf_v2f32( -; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], +; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], splat (float 0x7FF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[VAL]] ; %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 512) ; fcPosInf @@ -531,7 +531,7 @@ define i1 @test_class_is_ninf_or_nan_f32(float %x) { define <2 x i1> @test_class_is_ninf_v2f32(<2 x float> %x) { ; CHECK-LABEL: @test_class_is_ninf_v2f32( -; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], +; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], splat (float 0xFFF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[VAL]] ; %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 4) ; fcNegInf @@ -551,7 +551,7 @@ define i1 @test_class_is_inf_f32(float %x) { define <2 x i1> @test_class_is_inf_v2f32(<2 x float> %x) { ; CHECK-LABEL: @test_class_is_inf_v2f32( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]]) -; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[TMP1]], +; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[VAL]] ; %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 516) ; fcInf @@ -1300,7 +1300,7 @@ define i1 @test_no_fold_and_class_f32_0(float %a, float %b) { define <2 x i1> @test_fold_and_class_v2f32(<2 x float> %a) { ; CHECK-LABEL: @test_fold_and_class_v2f32( -; CHECK-NEXT: [[CLASS1:%.*]] = fcmp ueq <2 x float> [[A:%.*]], +; CHECK-NEXT: [[CLASS1:%.*]] = fcmp ueq <2 x float> [[A:%.*]], splat (float 0xFFF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[CLASS1]] ; %class0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 7) diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll index 7ace998556c703b..df697e686d986c9 100644 --- a/llvm/test/Transforms/InstCombine/ispow2.ll +++ b/llvm/test/Transforms/InstCombine/ispow2.ll @@ -16,7 +16,7 @@ define i1 @is_pow2or0_negate_op(i32 %x) { define <2 x i1> @is_pow2or0_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @is_pow2or0_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -40,7 +40,7 @@ define i1 @is_pow2or0_decrement_op(i8 %x) { define <2 x i1> @is_pow2or0_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2or0_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i8> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i8> [[TMP1]], splat (i8 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %dec = add <2 x i8> %x, @@ -64,7 +64,7 @@ define i1 @isnot_pow2or0_negate_op(i32 %x) { define <2 x i1> @isnot_pow2or0_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @isnot_pow2or0_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt <2 x i32> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -88,7 +88,7 @@ define i1 @isnot_pow2or0_decrement_op(i8 %x) { define <2 x i1> @isnot_pow2or0_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2or0_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %dec = add <2 x i8> %x, @@ -312,7 +312,7 @@ define i1 @is_pow2_ctpop_extra_uses_logical(i32 %x) { define <2 x i1> @is_pow2_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2_ctpop_commute_vec( ; CHECK-NEXT: [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[T0]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[T0]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) @@ -519,7 +519,7 @@ define i1 @isnot_pow2_ctpop_extra_uses_logical(i32 %x) { define <2 x i1> @isnot_pow2_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2_ctpop_commute_vec( ; CHECK-NEXT: [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[T0]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[T0]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) @@ -656,7 +656,7 @@ define i1 @is_pow2_negate_op_logical(i32 %x) { define <2 x i1> @is_pow2_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @is_pow2_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -698,7 +698,7 @@ define i1 @is_pow2_decrement_op_logical(i8 %x) { define <2 x i1> @is_pow2_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %dec = add <2 x i8> %x, @@ -740,7 +740,7 @@ define i1 @isnot_pow2_negate_op_logical(i32 %x) { define <2 x i1> @isnot_pow2_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @isnot_pow2_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -782,7 +782,7 @@ define i1 @isnot_pow2_decrement_op_logical(i8 %x) { define <2 x i1> @isnot_pow2_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %dec = add <2 x i8> %x, @@ -837,7 +837,7 @@ define i1 @is_pow2or0_ctpop_logical(i32 %x) { define <2 x i1> @is_pow2or0_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2or0_ctpop_commute_vec( ; CHECK-NEXT: [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp samesign ult <2 x i8> [[T0]], +; CHECK-NEXT: [[R:%.*]] = icmp samesign ult <2 x i8> [[T0]], splat (i8 2) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) @@ -1030,7 +1030,7 @@ define i1 @isnot_pow2nor0_ctpop_logical(i32 %x) { define <2 x i1> @isnot_pow2nor0_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2nor0_ctpop_commute_vec( ; CHECK-NEXT: [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i8> [[T0]], +; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i8> [[T0]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) @@ -1170,7 +1170,7 @@ define i1 @isnot_pow2nor0_ctpop_wrong_pred2_logical(i32 %x) { define <2 x i1> @isnot_pow2nor0_wrong_pred3_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2nor0_wrong_pred3_ctpop_commute_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) %cmp = icmp ne <2 x i8> %t0, @@ -1492,7 +1492,7 @@ define i1 @is_pow2_or_z_known_bits(i32 %xin) { define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) { ; CHECK-LABEL: @not_pow2_or_z_known_bits( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], splat (i32 -65) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -1504,9 +1504,9 @@ define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) { define <2 x i1> @not_pow2_or_z_known_bits_fail_wrong_cmp(<2 x i32> %xin) { ; CHECK-LABEL: @not_pow2_or_z_known_bits_fail_wrong_cmp( -; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[XIN:%.*]], +; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[XIN:%.*]], splat (i32 64) ; CHECK-NEXT: [[CNT:%.*]] = call range(i32 1, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X]]) -; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i32> [[CNT]], +; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i32> [[CNT]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %x = or <2 x i32> %xin, diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index aa24ec3905f1338..c9e7cbae3d9395f 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -939,10 +939,10 @@ define i1 @extract_value_uadd_fail2(<2 x i8> %xx, <2 x i8> %yy, i32 %idx) { define i1 @extract_value_uadd_fail3(<2 x i8> %xx, <2 x i8> %yy) { ; CHECK-LABEL: @extract_value_uadd_fail3( -; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], -; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], -; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], -; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], +; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], splat (i8 127) +; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], splat (i8 127) +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], splat (i8 1) +; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], splat (i8 1) ; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) ; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 ; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 diff --git a/llvm/test/Transforms/InstCombine/ldexp-ext.ll b/llvm/test/Transforms/InstCombine/ldexp-ext.ll index 58710005d6cce09..3fac72a7858e484 100644 --- a/llvm/test/Transforms/InstCombine/ldexp-ext.ll +++ b/llvm/test/Transforms/InstCombine/ldexp-ext.ll @@ -47,7 +47,7 @@ define double @ldexp_zext_double_fast_math(double %x, i1 %bool) { define <2 x float> @ldexp_zext_float_vector(<2 x float> %x, <2 x i1> %bool) { ; CHECK-LABEL: @ldexp_zext_float_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> , <2 x float> +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> splat (float 2.000000e+00), <2 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[LDEXP:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x float> [[LDEXP]] ; @@ -102,7 +102,7 @@ define double @ldexp_sext_double_fast_math(double %x, i1 %bool) { define <2 x float> @ldexp_sext_float_vector(<2 x float> %x, <2 x i1> %bool) { ; CHECK-LABEL: @ldexp_sext_float_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> , <2 x float> +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> splat (float 5.000000e-01), <2 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[LDEXP:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x float> [[LDEXP]] ; diff --git a/llvm/test/Transforms/InstCombine/ldexp.ll b/llvm/test/Transforms/InstCombine/ldexp.ll index 7ae483cbda0e227..8908d476b4a2c0b 100644 --- a/llvm/test/Transforms/InstCombine/ldexp.ll +++ b/llvm/test/Transforms/InstCombine/ldexp.ll @@ -830,7 +830,7 @@ define float @ldexp_127(float %x) { define <2 x float> @ldexp_3_vector(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @ldexp_3_vector ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[LDEXP:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[LDEXP:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: ret <2 x float> [[LDEXP]] ; %ldexp = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> ) diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index dbc68044c11a0db..b3fa3dae80379f4 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -347,7 +347,7 @@ define i1 @load_after_memset_1_i1(ptr %a) { define <4 x i8> @load_after_memset_1_vec(ptr %a) { ; CHECK-LABEL: @load_after_memset_1_vec( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 1) ; call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) %v = load <4 x i8>, ptr %a diff --git a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll index 58a9c8163c077b9..918ea605a10bfdc 100644 --- a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll +++ b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll @@ -21,8 +21,8 @@ define void @combine_masked_load_store_from_constant_array(ptr %ptr) { define void @combine_masked_expandload_compressstore_from_constant_array(ptr %ptr) { ; CHECK-LABEL: @combine_masked_expandload_compressstore_from_constant_array( -; CHECK-NEXT: [[TMP1:%.*]] = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull @contant_int_array, <10 x i1> , <10 x i64> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.compressstore.v10i64(<10 x i64> [[TMP1]], ptr [[PTR:%.*]], <10 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull @contant_int_array, <10 x i1> splat (i1 true), <10 x i64> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.compressstore.v10i64(<10 x i64> [[TMP1]], ptr [[PTR:%.*]], <10 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; %1 = alloca [10 x i64] diff --git a/llvm/test/Transforms/InstCombine/log-pow.ll b/llvm/test/Transforms/InstCombine/log-pow.ll index bfa636e470bb450..374115953145d85 100644 --- a/llvm/test/Transforms/InstCombine/log-pow.ll +++ b/llvm/test/Transforms/InstCombine/log-pow.ll @@ -137,7 +137,7 @@ define double @log10_exp(double %x) { define <2 x float> @logv_exp2v(<2 x float> %x) { ; CHECK-LABEL: @logv_exp2v( -; CHECK-NEXT: [[MUL:%.*]] = fmul fast <2 x float> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 0x3FE62E4300000000) ; CHECK-NEXT: ret <2 x float> [[MUL]] ; %exp = call fast <2 x float> @llvm.exp2.v2f32(<2 x float> %x) diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll index cf0dc3503288461..9e0c98bb3400687 100644 --- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll @@ -345,7 +345,7 @@ define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i6 ; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[A:%.*]], [[BC1]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> -; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[B:%.*]], [[BC2]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]] diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index 62a63839704a448..1b6e816d2e624ea 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -350,7 +350,7 @@ define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i6 ; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[A:%.*]], [[BC1]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> -; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[B:%.*]], [[BC2]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]] @@ -764,7 +764,7 @@ define <8 x i3> @bitcast_vec_cond_commute1(<3 x i1> noundef %cond, <8 x i3> %pc, ; CHECK-NEXT: [[C:%.*]] = mul <8 x i3> [[PC:%.*]], [[PC]] ; CHECK-NEXT: [[S:%.*]] = sext <3 x i1> [[COND:%.*]] to <3 x i8> ; CHECK-NEXT: [[T9:%.*]] = bitcast <3 x i8> [[S]] to <8 x i3> -; CHECK-NEXT: [[NOTT9:%.*]] = xor <8 x i3> [[T9]], +; CHECK-NEXT: [[NOTT9:%.*]] = xor <8 x i3> [[T9]], splat (i3 -1) ; CHECK-NEXT: [[T11:%.*]] = and <8 x i3> [[C]], [[NOTT9]] ; CHECK-NEXT: [[T12:%.*]] = and <8 x i3> [[D:%.*]], [[T9]] ; CHECK-NEXT: [[R:%.*]] = or disjoint <8 x i3> [[T11]], [[T12]] @@ -830,7 +830,7 @@ define <2 x i16> @bitcast_vec_cond_commute3(<4 x i8> %cond, <2 x i16> %pc, <2 x define <2 x i64> @bitcast_fp_vec_cond(<2 x double> noundef %s, <2 x i64> %c, <2 x i64> %d) { ; CHECK-LABEL: @bitcast_fp_vec_cond( ; CHECK-NEXT: [[T9:%.*]] = bitcast <2 x double> [[S:%.*]] to <2 x i64> -; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], +; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], splat (i64 -1) ; CHECK-NEXT: [[T11:%.*]] = and <2 x i64> [[C:%.*]], [[NOTT9]] ; CHECK-NEXT: [[T12:%.*]] = and <2 x i64> [[D:%.*]], [[T9]] ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i64> [[T11]], [[T12]] @@ -850,7 +850,7 @@ define <2 x i64> @bitcast_int_vec_cond(i1 noundef %b, <2 x i64> %c, <2 x i64> %d ; CHECK-LABEL: @bitcast_int_vec_cond( ; CHECK-NEXT: [[S:%.*]] = sext i1 [[B:%.*]] to i128 ; CHECK-NEXT: [[T9:%.*]] = bitcast i128 [[S]] to <2 x i64> -; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], +; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], splat (i64 -1) ; CHECK-NEXT: [[T11:%.*]] = and <2 x i64> [[C:%.*]], [[NOTT9]] ; CHECK-NEXT: [[T12:%.*]] = and <2 x i64> [[D:%.*]], [[T9]] ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i64> [[T11]], [[T12]] @@ -1043,7 +1043,7 @@ define i1 @not_d_bools_commute11(i1 %c, i1 %x, i1 %y) { define <2 x i1> @not_d_bools_vector(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @not_d_bools_vector( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[Y:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> [[X:%.*]], <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -1056,7 +1056,7 @@ define <2 x i1> @not_d_bools_vector(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { define <2 x i1> @not_d_bools_vector_poison(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @not_d_bools_vector_poison( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[Y:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> [[X:%.*]], <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -1181,8 +1181,8 @@ define i1 @logical_and_or_with_common_not_op_variant3(i1 %a, i1 %b) { ; A & (~A | B) --> A & B define <2 x i1> @logical_and_or_with_common_not_op_variant4(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @logical_and_or_with_common_not_op_variant4( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[NOT]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[NOT]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[A]], <2 x i1> [[B]], <2 x i1> zeroinitializer ; CHECK-NEXT: call void @use2(<2 x i1> [[A]]) ; CHECK-NEXT: call void @use2(<2 x i1> [[B]]) @@ -1244,7 +1244,7 @@ define i1 @logical_or_and_with_common_not_op_variant1(i1 %a, i1 %b) { ; A | (~A & B) --> A | B define <2 x i1> @logical_or_and_with_common_not_op_variant2(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @logical_or_and_with_common_not_op_variant2( -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %not = xor <2 x i1> %a, @@ -1272,9 +1272,9 @@ define i1 @logical_or_and_with_common_not_op_variant3(i1 %a, i1 %b) { ; A | (~A & B) --> A | B define <2 x i1> @logical_or_and_with_common_not_op_variant4(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @logical_or_and_with_common_not_op_variant4( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[NOT]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[A]], <2 x i1> , <2 x i1> [[B]] +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[A]], <2 x i1> splat (i1 true), <2 x i1> [[B]] ; CHECK-NEXT: call void @use2(<2 x i1> [[A]]) ; CHECK-NEXT: call void @use2(<2 x i1> [[B]]) ; CHECK-NEXT: call void @use2(<2 x i1> [[AND]]) diff --git a/llvm/test/Transforms/InstCombine/low-bit-splat.ll b/llvm/test/Transforms/InstCombine/low-bit-splat.ll index a86ab0359afd78f..ac069fb138dbe02 100644 --- a/llvm/test/Transforms/InstCombine/low-bit-splat.ll +++ b/llvm/test/Transforms/InstCombine/low-bit-splat.ll @@ -31,7 +31,7 @@ define i16 @t1_otherbitwidth(i16 %x) { ; Basic positive vector tests define <2 x i8> @t2_vec(<2 x i8> %x) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll index e1c441e9c0b446e..31bf6231f7d6b14 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll @@ -72,7 +72,7 @@ define i1 @scalar_i32_lshr_and_negC_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_lshr_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_negC_eq( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_lshr_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_lshr_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison1( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -96,7 +96,7 @@ define <4 x i1> @vec_lshr_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_lshr_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison2( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -108,7 +108,7 @@ define <4 x i1> @vec_lshr_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_lshr_and_negC_eq_poison3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison3( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll index 0166680309ea8c9..da3ed49c83d4ffd 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll @@ -72,7 +72,7 @@ define i1 @scalar_i32_lshr_and_signbit_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_lshr_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison1( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -96,7 +96,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> % define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison2( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -108,7 +108,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> % define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison3( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll index a77ad3a7ea74ee8..03188c0c590c437 100644 --- a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll +++ b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll @@ -64,7 +64,7 @@ define i16 @n3(i8 %x) { define <2 x i16> @t4_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @t4_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; @@ -76,7 +76,7 @@ define <2 x i16> @t4_vec_splat(<2 x i8> %x) { define <2 x i16> @t5_vec_poison(<2 x i8> %x) { ; CHECK-LABEL: @t5_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; @@ -143,7 +143,7 @@ define <2 x i16> @t9_extrause1_vec_poison(<2 x i8> %x) { ; CHECK-LABEL: @t9_extrause1_vec_poison( ; CHECK-NEXT: [[A:%.*]] = lshr <2 x i8> [[X:%.*]], ; CHECK-NEXT: call void @usevec8(<2 x i8> [[A]]) -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; @@ -188,7 +188,7 @@ define <2 x i16> @t11_extrause2_vec_poison(<2 x i8> %x) { define <2 x i10> @wide_source_shifted_signbit(<2 x i32> %x) { ; CHECK-LABEL: @wide_source_shifted_signbit( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 24) ; CHECK-NEXT: [[C:%.*]] = trunc nsw <2 x i32> [[TMP1]] to <2 x i10> ; CHECK-NEXT: ret <2 x i10> [[C]] ; diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index 4360714c78caa67..00626015d2ed76e 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -73,7 +73,7 @@ define <2 x i8> @lshr_cttz_zero_is_not_undef_splat_vec(<2 x i8> %x) { define <2 x i8> @lshr_ctpop_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_ctpop_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[SH]] ; @@ -153,8 +153,8 @@ define i8 @lshr_exact(i8 %x) { define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_exact_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[LSHR:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[LSHR:%.*]] = and <2 x i8> [[TMP1]], splat (i8 63) ; CHECK-NEXT: ret <2 x i8> [[LSHR]] ; %shl = shl <2 x i8> %x, @@ -165,7 +165,7 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { define <2 x i8> @lshr_exact_splat_vec_nuw(<2 x i8> %x) { ; CHECK-LABEL: @lshr_exact_splat_vec_nuw( -; CHECK-NEXT: [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[LSHR]] ; %shl = shl nuw <2 x i8> %x, @@ -190,9 +190,9 @@ define i8 @shl_add(i8 %x, i8 %y) { define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) { ; CHECK-LABEL: @shl_add_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]] -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[Y]], splat (i8 3) ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP2]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP2]], splat (i8 31) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization @@ -289,7 +289,7 @@ define i16 @smear_sign_and_widen_should_not_change_type(i4 %x) { define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) { ; CHECK-LABEL: @smear_sign_and_widen_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i6> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i6> [[X:%.*]], splat (i6 2) ; CHECK-NEXT: [[HIBIT:%.*]] = zext <2 x i6> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[HIBIT]] ; @@ -324,7 +324,7 @@ define i32 @fake_sext_but_should_not_change_type(i3 %x) { define <2 x i8> @fake_sext_splat(<2 x i3> %x) { ; CHECK-LABEL: @fake_sext_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i3> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i3> [[X:%.*]], splat (i3 2) ; CHECK-NEXT: [[SH:%.*]] = zext nneg <2 x i3> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[SH]] ; @@ -337,7 +337,7 @@ define <2 x i8> @fake_sext_splat(<2 x i3> %x) { define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @narrow_lshr_constant( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[SH:%.*]] = zext nneg <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SH]] ; @@ -359,7 +359,7 @@ define i32 @mul_splat_fold(i32 %x) { define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) { ; CHECK-LABEL: @mul_splat_fold_vec( -; CHECK-NEXT: [[M:%.*]] = mul nuw <3 x i14> [[X:%.*]], +; CHECK-NEXT: [[M:%.*]] = mul nuw <3 x i14> [[X:%.*]], splat (i14 129) ; CHECK-NEXT: call void @usevec(<3 x i14> [[M]]) ; CHECK-NEXT: ret <3 x i14> [[X]] ; @@ -787,7 +787,7 @@ define i32 @negative_and_odd(i32 %x) { define <2 x i7> @negative_and_odd_vec(<2 x i7> %x) { ; CHECK-LABEL: @negative_and_odd_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i7> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i7> [[X:%.*]], splat (i7 6) ; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i7> [[R]] ; @@ -851,7 +851,7 @@ define i12 @trunc_sandwich(i32 %x) { define <2 x i12> @trunc_sandwich_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @trunc_sandwich_splat_vec( -; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw <2 x i32> [[SUM_SHIFT]] to <2 x i12> ; CHECK-NEXT: ret <2 x i12> [[R1]] ; @@ -947,9 +947,9 @@ define i12 @trunc_sandwich_use1(i32 %x) { define <3 x i9> @trunc_sandwich_splat_vec_use1(<3 x i14> %x) { ; CHECK-LABEL: @trunc_sandwich_splat_vec_use1( -; CHECK-NEXT: [[SH:%.*]] = lshr <3 x i14> [[X:%.*]], +; CHECK-NEXT: [[SH:%.*]] = lshr <3 x i14> [[X:%.*]], splat (i14 6) ; CHECK-NEXT: call void @usevec(<3 x i14> [[SH]]) -; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <3 x i14> [[X]], +; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <3 x i14> [[X]], splat (i14 11) ; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw <3 x i14> [[SUM_SHIFT]] to <3 x i9> ; CHECK-NEXT: ret <3 x i9> [[R1]] ; @@ -1089,7 +1089,7 @@ define <3 x i14> @lshr_sext_i1_to_i14_splat_vec_use1(<3 x i1> %a) { ; CHECK-LABEL: @lshr_sext_i1_to_i14_splat_vec_use1( ; CHECK-NEXT: [[SEXT:%.*]] = sext <3 x i1> [[A:%.*]] to <3 x i14> ; CHECK-NEXT: call void @usevec(<3 x i14> [[SEXT]]) -; CHECK-NEXT: [[LSHR:%.*]] = select <3 x i1> [[A]], <3 x i14> , <3 x i14> zeroinitializer +; CHECK-NEXT: [[LSHR:%.*]] = select <3 x i1> [[A]], <3 x i14> splat (i14 1023), <3 x i14> zeroinitializer ; CHECK-NEXT: ret <3 x i14> [[LSHR]] ; %sext = sext <3 x i1> %a to <3 x i14> @@ -1269,7 +1269,7 @@ define <2 x i64> @narrow_bswap_splat_poison_elt(<2 x i16> %x) { define <2 x i64> @narrow_bswap_overshift(<2 x i32> %x) { ; CHECK-LABEL: @narrow_bswap_overshift( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: [[S:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[S]] ; @@ -1320,7 +1320,7 @@ define i8 @not_signbit(i8 %x) { define <2 x i6> @not_signbit_vec(<2 x i6> %x) { ; CHECK-LABEL: @not_signbit_vec( -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i6> [[X:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i6> [[X:%.*]], splat (i6 -1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[ISNOTNEG]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-add.ll b/llvm/test/Transforms/InstCombine/masked-merge-add.ll index 5ef53ad5150137d..d78a9ee59c714b0 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-add.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-add.ll @@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) { define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) { ; CHECK-LABEL: @p_splatvec( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]] ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] @@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) { define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) { define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask2_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll b/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll index 639478dfcc6fe82..399d6d4ac07d3f3 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll @@ -31,7 +31,7 @@ define i32 @p(i32 %x, i32 %y, i32 %m) { define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %m) { ; CHECK-LABEL: @p_splatvec( -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M:%.*]], +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M:%.*]], splat (i32 -1) ; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[X:%.*]], [[NEG]] ; CHECK-NEXT: [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], [[M]] ; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]] @@ -78,8 +78,8 @@ define i32 @p_constmask(i32 %x, i32 %y) { define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask_splatvec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 -65281) +; CHECK-NEXT: [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 65280) ; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-or.ll b/llvm/test/Transforms/InstCombine/masked-merge-or.ll index dd2ac6dfe510916..5e41279dd26b90f 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-or.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-or.ll @@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) { define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) { ; CHECK-LABEL: @p_splatvec( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]] ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] @@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) { define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) { define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask2_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll index 7ed1f3fdfdab64a..463f220fcbb6dd1 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll @@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) { define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) { ; CHECK-LABEL: @p_splatvec( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]] ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] @@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) { define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) { define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask2_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll index aee8f400ff43a18..155a7fd0590e4a5 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll @@ -191,7 +191,7 @@ define <2 x double> @gather_zeromask(<2 x ptr> %ptrs, <2 x double> %passthru) { define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) { ; CHECK-LABEL: @gather_onemask( -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> , <2 x double> poison) +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: ret <2 x double> [[RES]] ; %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> , <2 x double> %passthru) diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 15ffc881b5731b7..0f8b06e124211f5 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -191,7 +191,7 @@ define <2 x double> @gather_zeromask(<2 x ptr> %ptrs, <2 x double> %passthru) { define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) { ; CHECK-LABEL: @gather_onemask( -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> , <2 x double> poison) +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: ret <2 x double> [[RES]] ; %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> , <2 x double> %passthru) @@ -363,7 +363,7 @@ define void @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_m ; CHECK-LABEL: @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_mask( ; CHECK-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x ptr> [[INPTR:%.*]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[SRC:%.*]], align 2 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[WIDE_LOAD]], <4 x ptr> [[BROADCAST]], i32 2, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[WIDE_LOAD]], <4 x ptr> [[BROADCAST]], i32 2, <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; %broadcast= shufflevector <4 x ptr> %inPtr, <4 x ptr> poison, <4 x i32> zeroinitializer @@ -412,7 +412,7 @@ define <2 x i64> @gather_v2i64_uniform_ptrs_all_active_mask(ptr %src) { define <2 x i64> @negative_gather_v2i64_non_uniform_ptrs_all_active_mask(<2 x ptr> %inVal, ptr %src ) { ; CHECK-LABEL: @negative_gather_v2i64_non_uniform_ptrs_all_active_mask( ; CHECK-NEXT: [[INSERT_VALUE:%.*]] = insertelement <2 x ptr> [[INVAL:%.*]], ptr [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[INSERT_VALUE]], i32 8, <2 x i1> , <2 x i64> poison) +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[INSERT_VALUE]], i32 8, <2 x i1> splat (i1 true), <2 x i64> poison) ; CHECK-NEXT: ret <2 x i64> [[RES]] ; %insert.value = insertelement <2 x ptr> %inVal, ptr %src, i32 1 diff --git a/llvm/test/Transforms/InstCombine/max-of-nots.ll b/llvm/test/Transforms/InstCombine/max-of-nots.ll index acce89b38be83d8..b57f84b41033fd2 100644 --- a/llvm/test/Transforms/InstCombine/max-of-nots.ll +++ b/llvm/test/Transforms/InstCombine/max-of-nots.ll @@ -4,7 +4,7 @@ define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @umin_of_nots( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) -; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[MIN]] ; %notx = xor <2 x i32> %x, @@ -17,7 +17,7 @@ define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @smin_of_nots(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @smin_of_nots( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) -; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[MIN]] ; %notx = xor <2 x i32> %x, @@ -242,7 +242,7 @@ define <2 x i32> @max_of_nots_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @max_of_nots_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i32> [[TMP2]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[SMAX96]] ; %c0 = icmp sgt <2 x i32> %y, zeroinitializer @@ -258,7 +258,7 @@ define <2 x i37> @max_of_nots_weird_type_vec(<2 x i37> %x, <2 x i37> %y) { ; CHECK-LABEL: @max_of_nots_weird_type_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i37> @llvm.smax.v2i37(<2 x i37> [[Y:%.*]], <2 x i37> zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i37> @llvm.smin.v2i37(<2 x i37> [[TMP1]], <2 x i37> [[X:%.*]]) -; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i37> [[TMP2]], +; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i37> [[TMP2]], splat (i37 -1) ; CHECK-NEXT: ret <2 x i37> [[SMAX96]] ; %c0 = icmp sgt <2 x i37> %y, zeroinitializer @@ -324,7 +324,7 @@ define i32 @min_of_max_swap(i32 %a) { define <2 x i32> @max_of_min_vec(<2 x i32> %a) { ; CHECK-LABEL: @max_of_min_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 -1) ; %not_a = xor <2 x i32> %a, %c0 = icmp sgt <2 x i32> %a, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/maximum.ll b/llvm/test/Transforms/InstCombine/maximum.ll index 7455d3b7b924ccb..c79449472235b55 100644 --- a/llvm/test/Transforms/InstCombine/maximum.ll +++ b/llvm/test/Transforms/InstCombine/maximum.ll @@ -217,7 +217,7 @@ define float @maximum_f32_1_maximum_p0_val(float %x) { define <2 x float> @maximum_f32_1_maximum_val_p0_val_v2f32(<2 x float> %x) { ; CHECK-LABEL: @maximum_f32_1_maximum_val_p0_val_v2f32( -; CHECK-NEXT: [[Z:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[X:%.*]], <2 x float> ) +; CHECK-NEXT: [[Z:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[X:%.*]], <2 x float> splat (float 1.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[Z]] ; %y = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> zeroinitializer) diff --git a/llvm/test/Transforms/InstCombine/maxnum.ll b/llvm/test/Transforms/InstCombine/maxnum.ll index f26a5300febd859..67fb7a90b3d49e1 100644 --- a/llvm/test/Transforms/InstCombine/maxnum.ll +++ b/llvm/test/Transforms/InstCombine/maxnum.ll @@ -217,7 +217,7 @@ define float @maxnum_f32_1_maxnum_p0_val(float %x) { define <2 x float> @maxnum_f32_1_maxnum_val_p0_val_v2f32(<2 x float> %x) { ; CHECK-LABEL: @maxnum_f32_1_maxnum_val_p0_val_v2f32( -; CHECK-NEXT: [[Z:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[X:%.*]], <2 x float> ) +; CHECK-NEXT: [[Z:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[X:%.*]], <2 x float> splat (float 1.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[Z]] ; %y = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> zeroinitializer) diff --git a/llvm/test/Transforms/InstCombine/merge-icmp.ll b/llvm/test/Transforms/InstCombine/merge-icmp.ll index 4e8ef07dba04c84..4f658273ce09829 100644 --- a/llvm/test/Transforms/InstCombine/merge-icmp.ll +++ b/llvm/test/Transforms/InstCombine/merge-icmp.ll @@ -38,7 +38,7 @@ define i1 @and_test1_logical(ptr %x) { define <2 x i1> @and_test1_vector(ptr %x) { ; CHECK-LABEL: @and_test1_vector( ; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i16>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], +; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], splat (i16 17791) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %load = load <2 x i16>, ptr %x, align 4 @@ -83,7 +83,7 @@ define i1 @and_test2_logical(ptr %x) { define <2 x i1> @and_test2_vector(ptr %x) { ; CHECK-LABEL: @and_test2_vector( ; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i16>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], +; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], splat (i16 32581) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %load = load <2 x i16>, ptr %x, align 4 @@ -123,7 +123,7 @@ define i1 @or_basic_commuted(i16 %load) { define <2 x i1> @or_vector(<2 x i16> %load) { ; CHECK-LABEL: @or_vector( -; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i16> [[LOAD:%.*]], +; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i16> [[LOAD:%.*]], splat (i16 17791) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %trunc = trunc <2 x i16> %load to <2 x i8> diff --git a/llvm/test/Transforms/InstCombine/min-positive.ll b/llvm/test/Transforms/InstCombine/min-positive.ll index d2c2e9018792bdd..db73974b7bf6adb 100644 --- a/llvm/test/Transforms/InstCombine/min-positive.ll +++ b/llvm/test/Transforms/InstCombine/min-positive.ll @@ -97,7 +97,7 @@ define i1 @maybe_not_positive(i32 %other) { define <2 x i1> @maybe_not_positive_vec(<2 x i32> %x, <2 x i32> %other) { ; CHECK-LABEL: @maybe_not_positive_vec( -; CHECK-NEXT: [[NOTNEG:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[NOTNEG:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: [[SEL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[NOTNEG]], <2 x i32> [[OTHER:%.*]]) ; CHECK-NEXT: [[TEST:%.*]] = icmp sgt <2 x i32> [[SEL]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TEST]] diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll index ec1c7aff4096616..ccdf4400b16b54b 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fold.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll @@ -226,7 +226,7 @@ define i32 @test68(i32 %x) { define <2 x i32> @test68vec(<2 x i32> %x) { ; CHECK-LABEL: @test68vec( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 11)) ; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp slt <2 x i32> , %x @@ -290,7 +290,7 @@ define i32 @test72(i32 %x) { define <2 x i32> @test72vec(<2 x i32> %x) { ; CHECK-LABEL: @test72vec( -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 11)) ; CHECK-NEXT: ret <2 x i32> [[RETVAL]] ; %cmp = icmp sgt <2 x i32> %x, @@ -711,7 +711,7 @@ define <2 x i8> @min_through_cast_vec1(<2 x i32> %x) { define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) { ; CHECK-LABEL: @min_through_cast_vec2( -; CHECK-NEXT: [[RES1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[RES1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 511)) ; CHECK-NEXT: [[RES:%.*]] = trunc <2 x i32> [[RES1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[RES]] ; @@ -963,8 +963,8 @@ define i32 @add_umin_extra_use(i32 %x, ptr %p) { define <2 x i16> @add_umin_vec(<2 x i16> %x) { ; CHECK-LABEL: @add_umin_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> ) -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> splat (i16 225)) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i16> [[TMP1]], splat (i16 15) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %a = add nuw <2 x i16> %x, @@ -1075,8 +1075,8 @@ define i32 @add_umax_extra_use(i32 %x, ptr %p) { define <2 x i33> @add_umax_vec(<2 x i33> %x) { ; CHECK-LABEL: @add_umax_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.umax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> ) -; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i33> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.umax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> splat (i33 235)) +; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i33> [[TMP1]], splat (i33 5) ; CHECK-NEXT: ret <2 x i33> [[R]] ; %a = add nuw <2 x i33> %x, @@ -1210,8 +1210,8 @@ define i32 @add_smin_extra_use(i32 %x, ptr %p) { define <2 x i16> @add_smin_vec(<2 x i16> %x) { ; CHECK-LABEL: @add_smin_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> ) -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> splat (i16 225)) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i16> [[TMP1]], splat (i16 15) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %a = add nsw <2 x i16> %x, @@ -1318,8 +1318,8 @@ define i32 @add_smax_extra_use(i32 %x, ptr %p) { define <2 x i33> @add_smax_vec(<2 x i33> %x) { ; CHECK-LABEL: @add_smax_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.smax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> ) -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i33> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.smax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> splat (i33 235)) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i33> [[TMP1]], splat (i33 5) ; CHECK-NEXT: ret <2 x i33> [[R]] ; %a = add nsw <2 x i33> %x, @@ -1470,7 +1470,7 @@ define i32 @test_smin_umin2(i32 %x) { define <2 x i32> @test_smin_umin_vec(<2 x i32> %x) { ; CHECK-LABEL: @test_smin_umin_vec( -; CHECK-NEXT: [[UMIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[UMIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 10)) ; CHECK-NEXT: ret <2 x i32> [[UMIN]] ; %smin = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %x, <2 x i32> ) diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index a76f0f84ba34018..0b7127f82b61251 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -398,7 +398,7 @@ define i8 @smax_of_nots(i8 %x, i8 %y) { define <3 x i8> @smin_of_nots(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @smin_of_nots( ; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) -; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], +; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <3 x i8> [[M]] ; %notx = xor <3 x i8> %x, @@ -478,7 +478,7 @@ define i8 @smax_of_not_and_const(i8 %x) { define <3 x i8> @smin_of_not_and_const(<3 x i8> %x) { ; CHECK-LABEL: @smin_of_not_and_const( ; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> ) -; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], +; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <3 x i8> [[M]] ; %notx = xor <3 x i8> %x, @@ -774,8 +774,8 @@ define i8 @clamp_two_vals_smax_smin(i8 %x) { define <3 x i8> @clamp_two_vals_smin_smax(<3 x i8> %x) { ; CHECK-LABEL: @clamp_two_vals_smin_smax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> , <3 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], splat (i8 41) +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> splat (i8 42), <3 x i8> splat (i8 41) ; CHECK-NEXT: ret <3 x i8> [[R]] ; %m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> %x, <3 x i8> ) @@ -1933,8 +1933,8 @@ define i8 @smax_offset_uses(i8 %x) { define <3 x i8> @smin_offset(<3 x i8> %x) { ; CHECK-LABEL: @smin_offset( -; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> ) -; CHECK-NEXT: [[M:%.*]] = or disjoint <3 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> splat (i8 -127)) +; CHECK-NEXT: [[M:%.*]] = or disjoint <3 x i8> [[TMP1]], splat (i8 124) ; CHECK-NEXT: ret <3 x i8> [[M]] ; %a = add nsw nuw <3 x i8> %x, @@ -1996,8 +1996,8 @@ define i8 @smin_offset_uses(i8 %x) { define <3 x i8> @umax_offset(<3 x i8> %x) { ; CHECK-LABEL: @umax_offset( -; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> ) -; CHECK-NEXT: [[M:%.*]] = add nuw <3 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> splat (i8 3)) +; CHECK-NEXT: [[M:%.*]] = add nuw <3 x i8> [[TMP1]], splat (i8 127) ; CHECK-NEXT: ret <3 x i8> [[M]] ; %a = add nsw nuw <3 x i8> %x, @@ -2509,8 +2509,8 @@ entry: define <3 x i8> @fold_umax_with_knownbits_info_poison_in_splat(<3 x i8> %a, <3 x i8> %b) { ; CHECK-LABEL: @fold_umax_with_knownbits_info_poison_in_splat( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A1:%.*]] = or <3 x i8> [[A:%.*]], -; CHECK-NEXT: [[A2:%.*]] = shl <3 x i8> [[B:%.*]], +; CHECK-NEXT: [[A1:%.*]] = or <3 x i8> [[A:%.*]], splat (i8 1) +; CHECK-NEXT: [[A2:%.*]] = shl <3 x i8> [[B:%.*]], splat (i8 1) ; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i8> [[A1]], [[A2]] ; CHECK-NEXT: ret <3 x i8> [[SUB]] ; @@ -2538,7 +2538,7 @@ entry: define <3 x i8> @fold_umin_with_knownbits_info_poison_in_splat(<3 x i8> %a, <3 x i8> %b) { ; CHECK-LABEL: @fold_umin_with_knownbits_info_poison_in_splat( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> splat (i8 3) ; entry: %a1 = or <3 x i8> %a, diff --git a/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll b/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll index 8b896632b8adcc7..e688391f72e631a 100644 --- a/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll +++ b/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll @@ -15,7 +15,7 @@ declare void @barrier() define <2 x i8> @umax_xor_Cpow2(<2 x i8> %x) { ; CHECK-LABEL: @umax_xor_Cpow2( -; CHECK-NEXT: [[R:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x_xor = xor <2 x i8> %x, @@ -45,7 +45,7 @@ define i8 @smax_xor_Cpow2_pos(i8 %x) { define <2 x i8> @smin_xor_Cpow2_pos(<2 x i8> %x) { ; CHECK-LABEL: @smin_xor_Cpow2_pos( -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -17) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x_xor = xor <2 x i8> %x, @@ -55,7 +55,7 @@ define <2 x i8> @smin_xor_Cpow2_pos(<2 x i8> %x) { define <2 x i8> @smax_xor_Cpow2_neg(<2 x i8> %x) { ; CHECK-LABEL: @smax_xor_Cpow2_neg( -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x_xor = xor <2 x i8> %x, @@ -91,7 +91,7 @@ define <2 x i8> @umin_xor_pow2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @umin_xor_pow2( ; CHECK-NEXT: [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]] ; CHECK-NEXT: [[YP2:%.*]] = and <2 x i8> [[Y]], [[NY]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[YP2]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[YP2]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/modulo.ll b/llvm/test/Transforms/InstCombine/modulo.ll index 2988c524faedcab..7a9584d69d3bf10 100644 --- a/llvm/test/Transforms/InstCombine/modulo.ll +++ b/llvm/test/Transforms/InstCombine/modulo.ll @@ -16,7 +16,7 @@ define i32 @modulo2(i32 %x) { define <2 x i32> @modulo2_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo2_vec( -; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; %rem.i = srem <2 x i32> %x, @@ -43,9 +43,9 @@ define i32 @modulo3(i32 %x) { define <2 x i32> @modulo3_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo3_vec( -; CHECK-NEXT: [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i32> [[REM_I]], zeroinitializer -; CHECK-NEXT: [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> splat (i32 3), <2 x i32> zeroinitializer ; CHECK-NEXT: [[RET_I:%.*]] = add nsw <2 x i32> [[ADD_I]], [[REM_I]] ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; @@ -70,7 +70,7 @@ define i32 @modulo4(i32 %x) { define <2 x i32> @modulo4_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo4_vec( -; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; %rem.i = srem <2 x i32> %x, @@ -97,9 +97,9 @@ define i32 @modulo7(i32 %x) { define <2 x i32> @modulo7_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo7_vec( -; CHECK-NEXT: [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i32> [[REM_I]], zeroinitializer -; CHECK-NEXT: [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> splat (i32 7), <2 x i32> zeroinitializer ; CHECK-NEXT: [[RET_I:%.*]] = add nsw <2 x i32> [[ADD_I]], [[REM_I]] ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; @@ -124,7 +124,7 @@ define i32 @modulo32(i32 %x) { define <2 x i32> @modulo32_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo32_vec( -; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; %rem.i = srem <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll index 94f41236c6f8670..997758af62a5431 100644 --- a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll @@ -14,7 +14,7 @@ define i32 @pow2_multiplier(i32 %A) { define <2 x i32> @pow2_multiplier_vec(<2 x i32> %A) { ; CHECK-LABEL: @pow2_multiplier_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %B = mul <2 x i32> %A, @@ -492,7 +492,7 @@ define i55 @neg_mul_constant_apint(i55 %A) { define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { ; CHECK-LABEL: @neg_mul_constant_vec( -; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], splat (i8 -5) ; CHECK-NEXT: ret <3 x i8> [[B]] ; %A = sub <3 x i8> zeroinitializer, %a @@ -502,7 +502,7 @@ define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { define <3 x i4> @neg_mul_constant_vec_weird(<3 x i4> %a) { ; CHECK-LABEL: @neg_mul_constant_vec_weird( -; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], splat (i4 -5) ; CHECK-NEXT: ret <3 x i4> [[B]] ; %A = sub <3 x i4> zeroinitializer, %a @@ -593,7 +593,7 @@ define i32 @test32(i32 %X) { define <2 x i32> @test32vec(<2 x i32> %X) { ; CHECK-LABEL: @test32vec( -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %mul = mul nsw <2 x i32> %X, @@ -611,7 +611,7 @@ define i32 @test33(i32 %X) { define <2 x i32> @test33vec(<2 x i32> %X) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %mul = mul nsw <2 x i32> %X, @@ -767,7 +767,7 @@ define i32 @negate_if_false_nuw(i32 %x, i1 %cond) { define <2 x i8> @negate_if_true_commute(<2 x i8> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_commute( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i8> zeroinitializer, [[X]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[COND:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[R]] @@ -810,8 +810,8 @@ define i32 @negate_if_true_extra_use(i32 %x, i1 %cond) { define <2 x i8> @negate_if_true_wrong_constant(<2 x i8> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_wrong_constant( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = mul <2 x i8> [[X]], [[SEL]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -956,7 +956,7 @@ define i32 @mulsub1(i32 %a0, i32 %a1) { define <2 x i32> @mulsub1_vec(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @mulsub1_vec( ; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> %a1, %a0 @@ -999,8 +999,8 @@ define i32 @mulsub2(i32 %a0) { define <2 x i32> @mulsub2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @mulsub2_vec( -; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], splat (i32 -64) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> , %a0 @@ -1043,8 +1043,8 @@ define i32 @muladd2(i32 %a0) { define <2 x i32> @muladd2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @muladd2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> splat (i32 -64), [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %add = add <2 x i32> %a0, diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll index fd8ad88764f5922..48d44c05f39dffd 100644 --- a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll +++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll @@ -103,8 +103,8 @@ define i8 @one_demanded_bit(i8 %x) { define <2 x i8> @one_demanded_bit_splat(<2 x i8> %x) { ; CHECK-LABEL: @one_demanded_bit_splat( -; CHECK-NEXT: [[M:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[M]], +; CHECK-NEXT: [[M:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5) +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[M]], splat (i8 32) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %m = mul <2 x i8> %x, ; 0b1010_0000 @@ -134,7 +134,7 @@ define i33 @squared_one_demanded_low_bit(i33 %x) { define <2 x i8> @squared_one_demanded_low_bit_splat(<2 x i8> %x) { ; CHECK-LABEL: @squared_one_demanded_low_bit_splat( -; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 -2) ; CHECK-NEXT: ret <2 x i8> [[AND]] ; %mul = mul <2 x i8> %x, %x @@ -154,8 +154,8 @@ define i33 @squared_demanded_2_low_bits(i33 %x) { define <2 x i8> @squared_demanded_2_low_bits_splat(<2 x i8> %x) { ; CHECK-LABEL: @squared_demanded_2_low_bits_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[AND:%.*]] = or disjoint <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[AND:%.*]] = or disjoint <2 x i8> [[TMP1]], splat (i8 -4) ; CHECK-NEXT: ret <2 x i8> [[AND]] ; %mul = mul <2 x i8> %x, %x @@ -240,7 +240,7 @@ define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) { ; Vector tests define <2 x i64> @vector_mul_bit_x0_y0(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @vector_mul_bit_x0_y0( -; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[Y:%.*]], splat (i64 1) ; CHECK-NEXT: [[MUL:%.*]] = and <2 x i64> [[X:%.*]], [[AND2]] ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 8c528e340bc6ce3..e38ab1b9622b2cc 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -14,7 +14,7 @@ define i32 @pow2_multiplier(i32 %A) { define <2 x i32> @pow2_multiplier_vec(<2 x i32> %A) { ; CHECK-LABEL: @pow2_multiplier_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %B = mul <2 x i32> %A, @@ -161,7 +161,7 @@ define i5 @shl1_increment(i5 %x, i5 %y) { define <3 x i5> @shl1_nuw_increment_commute(<3 x i5> %x, <3 x i5> noundef %p) { ; CHECK-LABEL: @shl1_nuw_increment_commute( -; CHECK-NEXT: [[Y:%.*]] = ashr <3 x i5> [[P:%.*]], +; CHECK-NEXT: [[Y:%.*]] = ashr <3 x i5> [[P:%.*]], splat (i5 1) ; CHECK-NEXT: [[MULSHL:%.*]] = shl nuw <3 x i5> [[Y]], [[X:%.*]] ; CHECK-NEXT: [[M1:%.*]] = add nuw <3 x i5> [[MULSHL]], [[Y]] ; CHECK-NEXT: ret <3 x i5> [[M1]] @@ -989,7 +989,7 @@ define i32 @PR57278_or_disjoint_nsw(i32 %a) { define <2 x i32> @PR57278_shl_vec(<2 x i32> %v1) { ; CHECK-LABEL: @PR57278_shl_vec( ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <2 x i32> [[V1:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = add nuw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[MUL:%.*]] = add nuw <2 x i32> [[TMP1]], splat (i32 9) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %shl = shl nuw <2 x i32> %v1, @@ -1105,7 +1105,7 @@ define i55 @neg_mul_constant_apint(i55 %A) { define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { ; CHECK-LABEL: @neg_mul_constant_vec( -; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], splat (i8 -5) ; CHECK-NEXT: ret <3 x i8> [[B]] ; %A = sub <3 x i8> zeroinitializer, %a @@ -1115,7 +1115,7 @@ define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { define <3 x i4> @neg_mul_constant_vec_weird(<3 x i4> %a) { ; CHECK-LABEL: @neg_mul_constant_vec_weird( -; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], splat (i4 -5) ; CHECK-NEXT: ret <3 x i4> [[B]] ; %A = sub <3 x i4> zeroinitializer, %a @@ -1175,7 +1175,7 @@ define i32 @test32(i32 %X) { define <2 x i32> @test32vec(<2 x i32> %X) { ; CHECK-LABEL: @test32vec( -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %mul = mul nsw <2 x i32> %X, @@ -1193,7 +1193,7 @@ define i32 @test33(i32 %X) { define <2 x i32> @test33vec(<2 x i32> %X) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %mul = mul nsw <2 x i32> %X, @@ -1479,7 +1479,7 @@ define i32 @negate_if_false(i32 %x, i1 %cond) { define <2 x i8> @negate_if_true_commute(<2 x i8> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_commute( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i8> zeroinitializer, [[X]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[COND:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[R]] @@ -1522,8 +1522,8 @@ define i32 @negate_if_true_extra_use(i32 %x, i1 %cond) { define <2 x i8> @negate_if_true_wrong_constant(<2 x i8> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_wrong_constant( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = mul <2 x i8> [[X]], [[SEL]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1735,7 +1735,7 @@ define i32 @mulsub1(i32 %a0, i32 %a1) { define <2 x i32> @mulsub1_vec(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @mulsub1_vec( ; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> %a1, %a0 @@ -1778,8 +1778,8 @@ define i32 @mulsub2(i32 %a0) { define <2 x i32> @mulsub2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @mulsub2_vec( -; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], splat (i32 -64) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> , %a0 @@ -1846,8 +1846,8 @@ define i32 @muladd2(i32 %a0) { define <2 x i32> @muladd2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @muladd2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> splat (i32 -64), [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %add = add <2 x i32> %a0, @@ -1976,7 +1976,7 @@ define <2 x i14> @zext_negpow2_vec(<2 x i5> %x) { ; CHECK-LABEL: @zext_negpow2_vec( ; CHECK-NEXT: [[X_NEG:%.*]] = sub <2 x i5> zeroinitializer, [[X:%.*]] ; CHECK-NEXT: [[X_NEG_Z:%.*]] = zext <2 x i5> [[X_NEG]] to <2 x i14> -; CHECK-NEXT: [[R:%.*]] = shl <2 x i14> [[X_NEG_Z]], +; CHECK-NEXT: [[R:%.*]] = shl <2 x i14> [[X_NEG_Z]], splat (i14 11) ; CHECK-NEXT: ret <2 x i14> [[R]] ; %zx = zext <2 x i5> %x to <2 x i14> @@ -2015,7 +2015,7 @@ define <2 x i16> @sext_negpow2_vec(<2 x i8> %x) { ; CHECK-LABEL: @sext_negpow2_vec( ; CHECK-NEXT: [[X_NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]] ; CHECK-NEXT: [[X_NEG_Z:%.*]] = zext <2 x i8> [[X_NEG]] to <2 x i16> -; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i16> [[X_NEG_Z]], +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i16> [[X_NEG_Z]], splat (i16 8) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %sx = sext <2 x i8> %x to <2 x i16> diff --git a/llvm/test/Transforms/InstCombine/mul_fold.ll b/llvm/test/Transforms/InstCombine/mul_fold.ll index e4a21db8a6ece7a..ced30b6efa20468 100644 --- a/llvm/test/Transforms/InstCombine/mul_fold.ll +++ b/llvm/test/Transforms/InstCombine/mul_fold.ll @@ -542,8 +542,8 @@ define <2 x i8> @mul_v2i8_low(<2 x i8> %in0, <2 x i8> %in1) { define <2 x i8> @mul_v2i8_low_one_extra_user(<2 x i8> %in0, <2 x i8> %in1) { ; CHECK-LABEL: @mul_v2i8_low_one_extra_user( -; CHECK-NEXT: [[IN0HI:%.*]] = lshr <2 x i8> [[IN0:%.*]], -; CHECK-NEXT: [[IN1LO:%.*]] = and <2 x i8> [[IN1:%.*]], +; CHECK-NEXT: [[IN0HI:%.*]] = lshr <2 x i8> [[IN0:%.*]], splat (i8 4) +; CHECK-NEXT: [[IN1LO:%.*]] = and <2 x i8> [[IN1:%.*]], splat (i8 15) ; CHECK-NEXT: [[M01:%.*]] = mul nuw <2 x i8> [[IN1LO]], [[IN0HI]] ; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[M01]]) ; CHECK-NEXT: [[RETLO:%.*]] = mul <2 x i8> [[IN0]], [[IN1]] diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll index 0fcded7b0220a1c..14527e5b760d7e0 100644 --- a/llvm/test/Transforms/InstCombine/narrow-math.ll +++ b/llvm/test/Transforms/InstCombine/narrow-math.ll @@ -214,8 +214,8 @@ define i64 @sext_add_constant_extra_use(i32 %V) { define <2 x i64> @test5_splat(<2 x i32> %V) { ; CHECK-LABEL: @test5_splat( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], splat (i32 1073741823) ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; @@ -227,7 +227,7 @@ define <2 x i64> @test5_splat(<2 x i32> %V) { define <2 x i64> @test5_vec(<2 x i32> %V) { ; CHECK-LABEL: @test5_vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] @@ -253,8 +253,8 @@ define i64 @test6(i32 %V) { define <2 x i64> @test6_splat(<2 x i32> %V) { ; CHECK-LABEL: @test6_splat( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], splat (i32 -1073741824) ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; @@ -266,7 +266,7 @@ define <2 x i64> @test6_splat(<2 x i32> %V) { define <2 x i64> @test6_vec(<2 x i32> %V) { ; CHECK-LABEL: @test6_vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] @@ -279,7 +279,7 @@ define <2 x i64> @test6_vec(<2 x i32> %V) { define <2 x i64> @test6_vec2(<2 x i32> %V) { ; CHECK-LABEL: @test6_vec2( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] @@ -305,8 +305,8 @@ define i64 @test7(i32 %V) { define <2 x i64> @test7_splat(<2 x i32> %V) { ; CHECK-LABEL: @test7_splat( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], splat (i32 2147483647) ; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; @@ -318,7 +318,7 @@ define <2 x i64> @test7_splat(<2 x i32> %V) { define <2 x i64> @test7_vec(<2 x i32> %V) { ; CHECK-LABEL: @test7_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], ; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] @@ -344,8 +344,8 @@ define i64 @test8(i32 %V) { define <2 x i64> @test8_splat(<2 x i32> %V) { ; CHECK-LABEL: @test8_splat( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], splat (i32 32767) ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; @@ -357,7 +357,7 @@ define <2 x i64> @test8_splat(<2 x i32> %V) { define <2 x i64> @test8_vec(<2 x i32> %V) { ; CHECK-LABEL: @test8_vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) ; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] @@ -370,7 +370,7 @@ define <2 x i64> @test8_vec(<2 x i32> %V) { define <2 x i64> @test8_vec2(<2 x i32> %V) { ; CHECK-LABEL: @test8_vec2( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) ; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] @@ -396,8 +396,8 @@ define i64 @test9(i32 %V) { define <2 x i64> @test9_splat(<2 x i32> %V) { ; CHECK-LABEL: @test9_splat( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], splat (i32 -32767) ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; @@ -409,7 +409,7 @@ define <2 x i64> @test9_splat(<2 x i32> %V) { define <2 x i64> @test9_vec(<2 x i32> %V) { ; CHECK-LABEL: @test9_vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) ; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] @@ -435,8 +435,8 @@ define i64 @test10(i32 %V) { define <2 x i64> @test10_splat(<2 x i32> %V) { ; CHECK-LABEL: @test10_splat( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 16) +; CHECK-NEXT: [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], splat (i32 65535) ; CHECK-NEXT: [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; @@ -448,7 +448,7 @@ define <2 x i64> @test10_splat(<2 x i32> %V) { define <2 x i64> @test10_vec(<2 x i32> %V) { ; CHECK-LABEL: @test10_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 16) ; CHECK-NEXT: [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], ; CHECK-NEXT: [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] @@ -538,8 +538,8 @@ define i64 @test15(i32 %V) { define <2 x i64> @test15vec(<2 x i32> %V) { ; CHECK-LABEL: @test15vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = sub nsw <2 x i32> , [[ASHR]] +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = sub nsw <2 x i32> splat (i32 8), [[ASHR]] ; CHECK-NEXT: [[SUB:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SUB]] ; @@ -564,8 +564,8 @@ define i64 @test16(i32 %V) { define <2 x i64> @test16vec(<2 x i32> %V) { ; CHECK-LABEL: @test16vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = sub nuw <2 x i32> , [[LSHR]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = sub nuw <2 x i32> splat (i32 -2), [[LSHR]] ; CHECK-NEXT: [[SUB:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SUB]] ; diff --git a/llvm/test/Transforms/InstCombine/narrow.ll b/llvm/test/Transforms/InstCombine/narrow.ll index 40229f8511f76dd..5fad7f07090b9f4 100644 --- a/llvm/test/Transforms/InstCombine/narrow.ll +++ b/llvm/test/Transforms/InstCombine/narrow.ll @@ -37,7 +37,7 @@ define i32 @shrink_xor(i64 %a) { define <2 x i32> @shrink_xor_vec(<2 x i64> %a) { ; CHECK-LABEL: @shrink_xor_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TRUNC:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %xor = xor <2 x i64> %a, diff --git a/llvm/test/Transforms/InstCombine/negated-bitmask.ll b/llvm/test/Transforms/InstCombine/negated-bitmask.ll index 918867818634764..95b03626df534ab 100644 --- a/llvm/test/Transforms/InstCombine/negated-bitmask.ll +++ b/llvm/test/Transforms/InstCombine/negated-bitmask.ll @@ -31,8 +31,8 @@ define i8 @sub_mask1_lshr(i8 %a0) { define <4 x i32> @neg_mask1_lshr_vector_uniform(<4 x i32> %a0) { ; CHECK-LABEL: @neg_mask1_lshr_vector_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], splat (i32 28) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %shift = lshr <4 x i32> %a0, @@ -44,7 +44,7 @@ define <4 x i32> @neg_mask1_lshr_vector_uniform(<4 x i32> %a0) { define <4 x i32> @neg_mask1_lshr_vector_nonuniform(<4 x i32> %a0) { ; CHECK-LABEL: @neg_mask1_lshr_vector_nonuniform( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %shift = lshr <4 x i32> %a0, @@ -56,7 +56,7 @@ define <4 x i32> @neg_mask1_lshr_vector_nonuniform(<4 x i32> %a0) { define <4 x i32> @sub_mask1_lshr_vector_nonuniform(<4 x i32> %a0) { ; CHECK-LABEL: @sub_mask1_lshr_vector_nonuniform( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[NEG:%.*]] = add nsw <4 x i32> [[TMP2]], ; CHECK-NEXT: ret <4 x i32> [[NEG]] ; @@ -142,7 +142,7 @@ define i8 @neg_mask2_lshr_outofbounds(i8 %a0) { define <2 x i32> @neg_mask1_lshr_vector_var(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @neg_mask1_lshr_vector_var( ; CHECK-NEXT: [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], [[A1:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHIFT]], +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHIFT]], splat (i32 1) ; CHECK-NEXT: [[NEG:%.*]] = sub nsw <2 x i32> zeroinitializer, [[MASK]] ; CHECK-NEXT: ret <2 x i32> [[NEG]] ; @@ -171,8 +171,8 @@ define i8 @neg_mask1_lshr_extrause_mask(i8 %a0) { ; Extra Use - shift define <2 x i32> @neg_mask1_lshr_extrause_lshr(<2 x i32> %a0) { ; CHECK-LABEL: @neg_mask1_lshr_extrause_lshr( -; CHECK-NEXT: [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHIFT]], +; CHECK-NEXT: [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], splat (i32 3) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHIFT]], splat (i32 1) ; CHECK-NEXT: [[NEG:%.*]] = sub nsw <2 x i32> zeroinitializer, [[MASK]] ; CHECK-NEXT: call void @usev2i32(<2 x i32> [[SHIFT]]) ; CHECK-NEXT: ret <2 x i32> [[NEG]] @@ -200,7 +200,7 @@ define <2 x i64> @neg_signbit_use1(<2 x i32> %x) { ; CHECK-LABEL: @neg_signbit_use1( ; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[X:%.*]], ; CHECK-NEXT: call void @usev2i32(<2 x i32> [[S]]) -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/nested-select.ll b/llvm/test/Transforms/InstCombine/nested-select.ll index d01dcf0793ade27..b8a7d4e8ab78944 100644 --- a/llvm/test/Transforms/InstCombine/nested-select.ll +++ b/llvm/test/Transforms/InstCombine/nested-select.ll @@ -515,7 +515,7 @@ define i8 @test_implied_true(i8 %x) { define <2 x i8> @test_implied_true_vec(<2 x i8> %x) { ; CHECK-LABEL: @test_implied_true_vec( ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i8> zeroinitializer, <2 x i8> +; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 20) ; CHECK-NEXT: ret <2 x i8> [[SEL2]] ; %cmp1 = icmp slt <2 x i8> %x, @@ -570,10 +570,10 @@ define i8 @test_imply_fail(i8 %x) { define <2 x i8> @test_imply_type_mismatch(<2 x i8> %x, i8 %y) { ; CHECK-LABEL: @test_imply_type_mismatch( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 10) ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0 -; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i8> zeroinitializer, <2 x i8> -; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], <2 x i8> [[SEL1]], <2 x i8> +; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 5) +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], <2 x i8> [[SEL1]], <2 x i8> splat (i8 20) ; CHECK-NEXT: ret <2 x i8> [[SEL2]] ; %cmp1 = icmp slt <2 x i8> %x, @@ -584,6 +584,12 @@ define <2 x i8> @test_imply_type_mismatch(<2 x i8> %x, i8 %y) { } define <4 x i1> @test_dont_crash(i1 %cond, <4 x i1> %a, <4 x i1> %b) { +; CHECK-LABEL: @test_dont_crash( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <4 x i1> [[A:%.*]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[AND:%.*]] = and <4 x i1> [[SEL]], [[B:%.*]] +; CHECK-NEXT: ret <4 x i1> [[AND]] +; entry: %sel = select i1 %cond, <4 x i1> %a, <4 x i1> zeroinitializer %and = and <4 x i1> %sel, %b diff --git a/llvm/test/Transforms/InstCombine/not.ll b/llvm/test/Transforms/InstCombine/not.ll index 3679976d9dc3935..d693b9d8f855783 100644 --- a/llvm/test/Transforms/InstCombine/not.ll +++ b/llvm/test/Transforms/InstCombine/not.ll @@ -73,7 +73,7 @@ define i1 @not_cmp_constant(i32 %a) { define <2 x i1> @not_cmp_constant_vector(<2 x i32> %a) { ; CHECK-LABEL: @not_cmp_constant_vector( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -43) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %nota = xor <2 x i32> %a, @@ -114,7 +114,7 @@ define i8 @not_ashr_const(i8 %x) { define <2 x i8> @not_ashr_const_splat(<2 x i8> %x) { ; CHECK-LABEL: @not_ashr_const_splat( -; CHECK-NEXT: [[NOT:%.*]] = lshr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = lshr <2 x i8> splat (i8 41), [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[NOT]] ; %shr = ashr <2 x i8> , %x @@ -147,7 +147,7 @@ define i8 @not_lshr_const(i8 %x) { define <2 x i8> @not_lshr_const_splat(<2 x i8> %x) { ; CHECK-LABEL: @not_lshr_const_splat( -; CHECK-NEXT: [[NOT:%.*]] = ashr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = ashr <2 x i8> splat (i8 -43), [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[NOT]] ; %shr = lshr <2 x i8> , %x @@ -180,7 +180,7 @@ define i32 @not_sub_extra_use(i32 %y, ptr %p) { define <2 x i32> @not_sub_splat(<2 x i32> %y) { ; CHECK-LABEL: @not_sub_splat( -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 -124) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = sub <2 x i32> , %y @@ -190,9 +190,9 @@ define <2 x i32> @not_sub_splat(<2 x i32> %y) { define <2 x i32> @not_sub_extra_use_splat(<2 x i32> %y, ptr %p) { ; CHECK-LABEL: @not_sub_extra_use_splat( -; CHECK-NEXT: [[S:%.*]] = sub <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[S:%.*]] = sub <2 x i32> splat (i32 123), [[Y:%.*]] ; CHECK-NEXT: store <2 x i32> [[S]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y]], +; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y]], splat (i32 -124) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = sub <2 x i32> , %y @@ -238,7 +238,7 @@ define i32 @not_add(i32 %x) { define <2 x i32> @not_add_splat(<2 x i32> %x) { ; CHECK-LABEL: @not_add_splat( -; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> splat (i32 -124), [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add <2 x i32> %x, @@ -424,8 +424,8 @@ define i8 @not_or_neg(i8 %x, i8 %y) { define <3 x i5> @not_or_neg_commute_vec(<3 x i5> %x, <3 x i5> %p) { ; CHECK-LABEL: @not_or_neg_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <3 x i5> [[P:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = add <3 x i5> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <3 x i5> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = add <3 x i5> [[X:%.*]], splat (i5 -1) +; CHECK-NEXT: [[TMP2:%.*]] = xor <3 x i5> [[Y]], splat (i5 -1) ; CHECK-NEXT: [[NOT:%.*]] = and <3 x i5> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <3 x i5> [[NOT]] ; @@ -527,8 +527,8 @@ define i1 @not_select_bool_const4(i1 %x, i1 %y) { define <2 x i1> @not_logicalAnd_not_op0(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @not_logicalAnd_not_op0( -; CHECK-NEXT: [[Y_NOT:%.*]] = xor <2 x i1> [[Y:%.*]], -; CHECK-NEXT: [[NOTAND:%.*]] = select <2 x i1> [[X:%.*]], <2 x i1> , <2 x i1> [[Y_NOT]] +; CHECK-NEXT: [[Y_NOT:%.*]] = xor <2 x i1> [[Y:%.*]], splat (i1 true) +; CHECK-NEXT: [[NOTAND:%.*]] = select <2 x i1> [[X:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[Y_NOT]] ; CHECK-NEXT: ret <2 x i1> [[NOTAND]] ; %notx = xor <2 x i1> %x, @@ -583,7 +583,7 @@ define i1 @not_logicalAnd_not_op0_use2(i1 %x, i1 %y) { define <2 x i1> @not_logicalOr_not_op0(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @not_logicalOr_not_op0( -; CHECK-NEXT: [[Y_NOT:%.*]] = xor <2 x i1> [[Y:%.*]], +; CHECK-NEXT: [[Y_NOT:%.*]] = xor <2 x i1> [[Y:%.*]], splat (i1 true) ; CHECK-NEXT: [[NOTOR:%.*]] = select <2 x i1> [[X:%.*]], <2 x i1> [[Y_NOT]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[NOTOR]] ; @@ -641,7 +641,7 @@ define i1 @not_logicalOr_not_op0_use2(i1 %x, i1 %y) { define <2 x i64> @bitcast_to_wide_elts_sext_bool(<4 x i1> %b) { ; CHECK-LABEL: @bitcast_to_wide_elts_sext_bool( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[NOT:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[NOT]] @@ -654,7 +654,7 @@ define <2 x i64> @bitcast_to_wide_elts_sext_bool(<4 x i1> %b) { define <8 x i16> @bitcast_to_narrow_elts_sext_bool(<4 x i1> %b) { ; CHECK-LABEL: @bitcast_to_narrow_elts_sext_bool( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[NOT:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x i16> ; CHECK-NEXT: ret <8 x i16> [[NOT]] @@ -680,7 +680,7 @@ define <2 x i16> @bitcast_to_vec_sext_bool(i1 %b) { define i128 @bitcast_to_scalar_sext_bool(<4 x i1> %b) { ; CHECK-LABEL: @bitcast_to_scalar_sext_bool( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[NOT:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: ret i128 [[NOT]] @@ -698,7 +698,7 @@ define <2 x i4> @bitcast_to_vec_sext_bool_use1(i1 %b) { ; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[B:%.*]] to i8 ; CHECK-NEXT: call void @use8(i8 [[SEXT]]) ; CHECK-NEXT: [[BC:%.*]] = bitcast i8 [[SEXT]] to <2 x i4> -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[BC]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[BC]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %sext = sext i1 %b to i8 diff --git a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll index 8a508af632962a2..787f8c7fd91701b 100644 --- a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll @@ -103,7 +103,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], splat (i32 17) ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> @@ -119,7 +119,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], +; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], splat (i32 17) ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/nsw.ll b/llvm/test/Transforms/InstCombine/nsw.ll index 6ced39a88c0a6a2..329a47324f86230 100644 --- a/llvm/test/Transforms/InstCombine/nsw.ll +++ b/llvm/test/Transforms/InstCombine/nsw.ll @@ -103,7 +103,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], splat (i32 17) ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> @@ -119,7 +119,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], +; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], splat (i32 17) ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index 3fd4a17d972af43..cd795aeeb212e30 100644 --- a/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -72,7 +72,7 @@ define i1 @p3_scalar_shifted2_urem_by_const(i32 %x, i32 %y) { define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p4_vector_urem_by_const__splat( -; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[T2]] ; @@ -111,7 +111,7 @@ define <4 x i1> @p6_vector_urem_by_const__nonsplat_poison0(<4 x i32> %x, <4 x i3 define <4 x i1> @p7_vector_urem_by_const__nonsplat_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p7_vector_urem_by_const__nonsplat_poison2( -; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], ; CHECK-NEXT: ret <4 x i1> [[T2]] ; diff --git a/llvm/test/Transforms/InstCombine/onehot_merge.ll b/llvm/test/Transforms/InstCombine/onehot_merge.ll index d8ef66a4dd78187..2e57597455c2cd8 100644 --- a/llvm/test/Transforms/InstCombine/onehot_merge.ll +++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll @@ -31,8 +31,8 @@ define i1 @and_consts_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @and_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @and_consts_vector( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], -; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], splat (i32 12) +; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 12) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %t1 = and <2 x i32> , %k @@ -84,8 +84,8 @@ define i1 @foo1_and_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_and_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_vector( -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]] @@ -148,8 +148,8 @@ define i1 @foo1_and_commuted_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_and_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_commuted_vector( ; CHECK-NEXT: [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]] -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K2]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]] @@ -196,8 +196,8 @@ define i1 @or_consts_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @or_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @or_consts_vector( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], -; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], splat (i32 12) +; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 12) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %t1 = and <2 x i32> , %k @@ -249,8 +249,8 @@ define i1 @foo1_or_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_or_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_vector( -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]] @@ -313,8 +313,8 @@ define i1 @foo1_or_commuted_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_or_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_commuted_vector( ; CHECK-NEXT: [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]] -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K2]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]] @@ -372,8 +372,8 @@ define i1 @foo1_and_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_and_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_signbit_lshr_vector( -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = lshr exact <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = lshr exact <2 x i32> splat (i32 -2147483648), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]] @@ -430,8 +430,8 @@ define i1 @foo1_or_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_or_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_signbit_lshr_vector( -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = lshr exact <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = lshr exact <2 x i32> splat (i32 -2147483648), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]] diff --git a/llvm/test/Transforms/InstCombine/operand-complexity.ll b/llvm/test/Transforms/InstCombine/operand-complexity.ll index 541a15275b61700..491bf950bf435a2 100644 --- a/llvm/test/Transforms/InstCombine/operand-complexity.ll +++ b/llvm/test/Transforms/InstCombine/operand-complexity.ll @@ -60,7 +60,7 @@ define i8 @not(i8 %x) { define <2 x i8> @not_vec(<2 x i8> %x) { ; CHECK-LABEL: @not_vec( ; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i8> [[X]], +; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i8> [[X]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = mul <2 x i8> [[BO]], [[NOTX]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/or-concat.ll b/llvm/test/Transforms/InstCombine/or-concat.ll index dfc3f0631773a35..deeaea33af9f128 100644 --- a/llvm/test/Transforms/InstCombine/or-concat.ll +++ b/llvm/test/Transforms/InstCombine/or-concat.ll @@ -65,7 +65,7 @@ define i64 @concat_bswap32_unary_flip(i64 %a0) { define <2 x i64> @concat_bswap32_unary_flip_vector(<2 x i64> %a0) { ; CHECK-LABEL: @concat_bswap32_unary_flip_vector( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> splat (i64 32)) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]]) ; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; @@ -103,7 +103,7 @@ define <2 x i64> @concat_bswap32_binary_vector(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @concat_bswap32_binary_vector( ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], splat (i64 32) ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i64> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP4]]) ; CHECK-NEXT: ret <2 x i64> [[TMP5]] @@ -176,7 +176,7 @@ define i64 @concat_bitreverse32_unary_flip(i64 %a0) { define <2 x i64> @concat_bitreverse32_unary_flip_vector(<2 x i64> %a0) { ; CHECK-LABEL: @concat_bitreverse32_unary_flip_vector( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> splat (i64 32)) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]]) ; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; @@ -214,7 +214,7 @@ define <2 x i64> @concat_bitreverse32_binary_vector(<2 x i32> %a0, <2 x i32> %a1 ; CHECK-LABEL: @concat_bitreverse32_binary_vector( ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], splat (i64 32) ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i64> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP4]]) ; CHECK-NEXT: ret <2 x i64> [[TMP5]] diff --git a/llvm/test/Transforms/InstCombine/or-xor.ll b/llvm/test/Transforms/InstCombine/or-xor.ll index f4ddbb5abc4639d..b05ff15b8b3c84e 100644 --- a/llvm/test/Transforms/InstCombine/or-xor.ll +++ b/llvm/test/Transforms/InstCombine/or-xor.ll @@ -81,7 +81,7 @@ define i32 @test5(i32 %x, i32 %y) { define <2 x i4> @test5_commuted(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @test5_commuted( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[Z:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[Z:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[Z]] ; %xor = xor <2 x i4> %x, %y @@ -437,7 +437,7 @@ define i9 @or_and_xor_not_constant_commute1(i9 %a, i9 %b) { define <2 x i9> @or_and_xor_not_constant_commute2_splat(<2 x i9> %a, <2 x i9> %b) { ; CHECK-LABEL: @or_and_xor_not_constant_commute2_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i9> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i9> [[A:%.*]], splat (i9 42) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i9> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i9> [[XOR]] ; @@ -450,7 +450,7 @@ define <2 x i9> @or_and_xor_not_constant_commute2_splat(<2 x i9> %a, <2 x i9> %b define <2 x i9> @or_and_xor_not_constant_commute3_splat(<2 x i9> %a, <2 x i9> %b) { ; CHECK-LABEL: @or_and_xor_not_constant_commute3_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i9> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i9> [[A:%.*]], splat (i9 42) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i9> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i9> [[XOR]] ; @@ -862,7 +862,7 @@ define i8 @or_not_xor_common_op_commute3(i8 %x, i8 %y, i8 %p) { define <2 x i4> @or_not_xor_common_op_commute4(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @or_not_xor_common_op_commute4( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 -1) ; CHECK-NEXT: [[O2:%.*]] = or <2 x i4> [[Z:%.*]], [[NAND]] ; CHECK-NEXT: ret <2 x i4> [[O2]] ; @@ -975,7 +975,7 @@ define <2 x i4> @or_nand_xor_common_op_commute1(<2 x i4> %x, <2 x i4> %y, <2 x i ; CHECK-LABEL: @or_nand_xor_common_op_commute1( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Z:%.*]], [[X:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[AND]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %and = and <2 x i4> %z, %x diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 9bcad034b363e71..4a886afd78a5f0c 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -135,8 +135,8 @@ define i1 @test18_logical(i32 %A) { define <2 x i1> @test18vec(<2 x i32> %A) { ; CHECK-LABEL: @test18vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -100) +; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -50) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %B = icmp sge <2 x i32> %A, @@ -196,8 +196,8 @@ define i16 @test23(i16 %A) { define <2 x i16> @test23vec(<2 x i16> %A) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i16> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = xor <2 x i16> [[B]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 1) +; CHECK-NEXT: [[D:%.*]] = xor <2 x i16> [[B]], splat (i16 -24575) ; CHECK-NEXT: ret <2 x i16> [[D]] ; %B = lshr <2 x i16> %A, @@ -400,8 +400,8 @@ define i32 @test30(i32 %A) { define <2 x i32> @test30vec(<2 x i32> %A) { ; CHECK-LABEL: @test30vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[E:%.*]] = or disjoint <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -58312) +; CHECK-NEXT: [[E:%.*]] = or disjoint <2 x i32> [[TMP1]], splat (i32 32962) ; CHECK-NEXT: ret <2 x i32> [[E]] ; %B = or <2 x i32> %A, @@ -430,8 +430,8 @@ define i64 @test31(i64 %A) { define <2 x i64> @test31vec(<2 x i64> %A) { ; CHECK-LABEL: @test31vec( -; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[F:%.*]] = or disjoint <2 x i64> [[E]], +; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 4294908984) +; CHECK-NEXT: [[F:%.*]] = or disjoint <2 x i64> [[E]], splat (i64 32962) ; CHECK-NEXT: ret <2 x i64> [[F]] ; %B = or <2 x i64> %A, @@ -555,8 +555,8 @@ define i1 @test37_logical(i32 %x) { define <2 x i1> @test37_uniform(<2 x i32> %x) { ; CHECK-LABEL: @test37_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET1:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[RET1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[RET1]] ; %add1 = add <2 x i32> %x, @@ -861,9 +861,9 @@ define i1 @test46_logical(i8 signext %c) { define <2 x i1> @test46_uniform(<2 x i8> %c) { ; CHECK-LABEL: @test46_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[C:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], -; CHECK-NEXT: [[OR:%.*]] = icmp ult <2 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[C:%.*]], splat (i8 -33) +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -65) +; CHECK-NEXT: [[OR:%.*]] = icmp ult <2 x i8> [[TMP2]], splat (i8 26) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %c.off = add <2 x i8> %c, @@ -940,9 +940,9 @@ define i1 @test47_logical(i8 signext %c) { define <2 x i1> @test47_nonuniform(<2 x i8> %c) { ; CHECK-LABEL: @test47_nonuniform( ; CHECK-NEXT: [[C_OFF:%.*]] = add <2 x i8> [[C:%.*]], -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], splat (i8 27) ; CHECK-NEXT: [[C_OFF17:%.*]] = add <2 x i8> [[C]], -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], splat (i8 27) ; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[CMP1]], [[CMP2]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; @@ -966,7 +966,7 @@ define i32 @test49(i1 %C) { define <2 x i32> @test49vec(i1 %C) { ; CHECK-LABEL: @test49vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1019), <2 x i32> splat (i32 123) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -1013,7 +1013,7 @@ define <2 x i32> @test50vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 1019), [[ENTRY:%.*]] ], [ splat (i32 123), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A]] ; entry: @@ -1404,7 +1404,7 @@ define i32 @test3(i32 %x, i32 %y) { define <2 x i32> @test4_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test4_vec( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[OR1]] ; %or = or <2 x i32> %y, %x @@ -1479,7 +1479,7 @@ define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) { define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) { ; CHECK-LABEL: @ashr_bitwidth_mask_vec_commute( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], -; CHECK-NEXT: [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: [[R:%.*]] = or <2 x i8> [[Y]], [[SIGN]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1538,7 +1538,7 @@ define i1 @cmp_overlap(i32 %x) { define <2 x i1> @cmp_overlap_splat(<2 x i5> %x) { ; CHECK-LABEL: @cmp_overlap_splat( -; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i5> [[X:%.*]], splat (i5 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %isneg = icmp slt <2 x i5> %x, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/overflow-mul.ll b/llvm/test/Transforms/InstCombine/overflow-mul.ll index 6b5a65c03ee102b..1d18d9ffd46d26e 100644 --- a/llvm/test/Transforms/InstCombine/overflow-mul.ll +++ b/llvm/test/Transforms/InstCombine/overflow-mul.ll @@ -216,7 +216,7 @@ define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: [[VMOVL_I_I726:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[VMOVL_I_I712:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32> ; CHECK-NEXT: [[MUL_I703:%.*]] = mul nuw <4 x i32> [[VMOVL_I_I712]], [[VMOVL_I_I726]] -; CHECK-NEXT: [[TMP:%.*]] = icmp sgt <4 x i32> [[MUL_I703]], +; CHECK-NEXT: [[TMP:%.*]] = icmp sgt <4 x i32> [[MUL_I703]], splat (i32 -1) ; CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[VCGEZ_I]] ; diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll index 478e6bd5878d5ad..ab2584add4da7e1 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -56,11 +56,11 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nuw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i64> [[T2]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i64> [[T2]], splat (i64 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) @@ -68,7 +68,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -128,9 +128,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nuw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i64> [[T2]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i64> [[T2]], splat (i64 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll index 469375633b60e1c..3db87b00c1a901c 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -56,11 +56,11 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) @@ -68,7 +68,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -128,9 +128,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll index 1a711e58c333bee..e16aac9b912b25d 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -49,14 +49,14 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll index cd0098ecdb0a6a3..ebfa456cf0dd834 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -53,16 +53,16 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] -; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -85,7 +85,7 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -116,7 +116,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll index 9a4a5dd890eec26..5dda49b280e1917 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -50,13 +50,13 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll index a9fafdbf7b8db70..4a0752f3d0e0ee1 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll @@ -46,16 +46,16 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[T1]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1) +; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i32> splat (i32 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[T1]], splat (i32 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, @@ -101,9 +101,9 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[T1]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i32> splat (i32 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[T1]], splat (i32 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll index bce2a1c3f7e5057..a6c3f3341726548 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll @@ -46,16 +46,16 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1) +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], splat (i32 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, @@ -101,9 +101,9 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], splat (i32 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll index 55d0b3f80a519b2..cc2a33e43cbf1bb 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll @@ -38,12 +38,12 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = lshr <8 x i32> , %nbits @@ -76,7 +76,7 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS:%.*]] ; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll index 7ad99a6bb0a38f3..eff9bcb2ddb783a 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll @@ -42,14 +42,14 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]] -; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T4]] ; %t0 = shl <8 x i32> , %nbits @@ -66,7 +66,7 @@ define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) @@ -88,8 +88,8 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll index 7be65330013129d..66a46aeb908b546 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll @@ -39,11 +39,11 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = shl <8 x i32> [[X:%.*]], [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = shl <8 x i32> %x, %nbits diff --git a/llvm/test/Transforms/InstCombine/pow-0.ll b/llvm/test/Transforms/InstCombine/pow-0.ll index 01c03df8c6b5451..26a54c6850e2abd 100644 --- a/llvm/test/Transforms/InstCombine/pow-0.ll +++ b/llvm/test/Transforms/InstCombine/pow-0.ll @@ -40,7 +40,7 @@ define double @fast_minus_zero(double %value) { define <2 x double> @vec_zero(<2 x double> %value) { ; CHECK-LABEL: define <2 x double> @vec_zero( ; CHECK-SAME: <2 x double> [[VALUE:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %res = call <2 x double> @llvm.pow.v2f64(<2 x double> %value, <2 x double> ) ret <2 x double> %res @@ -49,7 +49,7 @@ define <2 x double> @vec_zero(<2 x double> %value) { define <2 x double> @vec_minus_zero(<2 x double> %value) { ; CHECK-LABEL: define <2 x double> @vec_minus_zero( ; CHECK-SAME: <2 x double> [[VALUE:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %res = call <2 x double> @llvm.pow.v2f64(<2 x double> %value, <2 x double> ) ret <2 x double> %res @@ -58,7 +58,7 @@ define <2 x double> @vec_minus_zero(<2 x double> %value) { define <2 x double> @vec_fast_zero(<2 x double> %value) { ; CHECK-LABEL: define <2 x double> @vec_fast_zero( ; CHECK-SAME: <2 x double> [[VALUE:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %res = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %value, <2 x double> ) ret <2 x double> %res @@ -67,7 +67,7 @@ define <2 x double> @vec_fast_zero(<2 x double> %value) { define <2 x double> @vec_fast_minus_zero(<2 x double> %value) { ; CHECK-LABEL: define <2 x double> @vec_fast_minus_zero( ; CHECK-SAME: <2 x double> [[VALUE:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %res = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %value, <2 x double> ) ret <2 x double> %res diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll index 91e909aa25c7e32..c23d15d16a34bad 100644 --- a/llvm/test/Transforms/InstCombine/pow-1.ll +++ b/llvm/test/Transforms/InstCombine/pow-1.ll @@ -97,7 +97,7 @@ define float @test_simplify1_noerrno(float %x) { define <2 x float> @test_simplify1v(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_simplify1v( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %retval @@ -134,7 +134,7 @@ define double @test_simplify2_noerrno(double %x) { define <2 x double> @test_simplify2v(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @test_simplify2v( ; CHECK-SAME: <2 x double> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %retval @@ -253,22 +253,22 @@ define <2 x float> @test_simplify3v(<2 x float> %x) { ; ; VC32-LABEL: define <2 x float> @test_simplify3v( ; VC32-SAME: <2 x float> [[X:%.*]]) { -; VC32-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC32-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]]) ; VC32-NEXT: ret <2 x float> [[RETVAL]] ; ; VC19-LABEL: define <2 x float> @test_simplify3v( ; VC19-SAME: <2 x float> [[X:%.*]]) { -; VC19-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC19-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]]) ; VC19-NEXT: ret <2 x float> [[RETVAL]] ; ; VC64-LABEL: define <2 x float> @test_simplify3v( ; VC64-SAME: <2 x float> [[X:%.*]]) { -; VC64-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC64-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]]) ; VC64-NEXT: ret <2 x float> [[RETVAL]] ; ; NOLIB-LABEL: define <2 x float> @test_simplify3v( ; NOLIB-SAME: <2 x float> [[X:%.*]]) { -; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]]) ; NOLIB-NEXT: ret <2 x float> [[RETVAL]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) @@ -279,28 +279,28 @@ define <2 x float> @test_simplify3v(<2 x float> %x) { define <2 x double> @test_simplify3vn(<2 x double> %x) { ; ANY-LABEL: define <2 x double> @test_simplify3vn( ; ANY-SAME: <2 x double> [[X:%.*]]) { -; ANY-NEXT: [[MUL:%.*]] = fmul <2 x double> [[X]], +; ANY-NEXT: [[MUL:%.*]] = fmul <2 x double> [[X]], splat (double 2.000000e+00) ; ANY-NEXT: [[EXP2:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]]) ; ANY-NEXT: ret <2 x double> [[EXP2]] ; ; VC32-LABEL: define <2 x double> @test_simplify3vn( ; VC32-SAME: <2 x double> [[X:%.*]]) { -; VC32-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC32-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]]) ; VC32-NEXT: ret <2 x double> [[RETVAL]] ; ; VC19-LABEL: define <2 x double> @test_simplify3vn( ; VC19-SAME: <2 x double> [[X:%.*]]) { -; VC19-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC19-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]]) ; VC19-NEXT: ret <2 x double> [[RETVAL]] ; ; VC64-LABEL: define <2 x double> @test_simplify3vn( ; VC64-SAME: <2 x double> [[X:%.*]]) { -; VC64-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC64-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]]) ; VC64-NEXT: ret <2 x double> [[RETVAL]] ; ; NOLIB-LABEL: define <2 x double> @test_simplify3vn( ; NOLIB-SAME: <2 x double> [[X:%.*]]) { -; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]]) ; NOLIB-NEXT: ret <2 x double> [[RETVAL]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) @@ -450,22 +450,22 @@ define <2 x double> @test_simplify4v(<2 x double> %x) { ; ; VC32-LABEL: define <2 x double> @test_simplify4v( ; VC32-SAME: <2 x double> [[X:%.*]]) { -; VC32-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC32-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]]) ; VC32-NEXT: ret <2 x double> [[RETVAL]] ; ; VC19-LABEL: define <2 x double> @test_simplify4v( ; VC19-SAME: <2 x double> [[X:%.*]]) { -; VC19-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC19-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]]) ; VC19-NEXT: ret <2 x double> [[RETVAL]] ; ; VC64-LABEL: define <2 x double> @test_simplify4v( ; VC64-SAME: <2 x double> [[X:%.*]]) { -; VC64-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC64-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]]) ; VC64-NEXT: ret <2 x double> [[RETVAL]] ; ; NOLIB-LABEL: define <2 x double> @test_simplify4v( ; NOLIB-SAME: <2 x double> [[X:%.*]]) { -; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]]) ; NOLIB-NEXT: ret <2 x double> [[RETVAL]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) @@ -482,22 +482,22 @@ define <2 x float> @test_simplify4vn(<2 x float> %x) { ; ; VC32-LABEL: define <2 x float> @test_simplify4vn( ; VC32-SAME: <2 x float> [[X:%.*]]) { -; VC32-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC32-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]]) ; VC32-NEXT: ret <2 x float> [[RETVAL]] ; ; VC19-LABEL: define <2 x float> @test_simplify4vn( ; VC19-SAME: <2 x float> [[X:%.*]]) { -; VC19-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC19-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]]) ; VC19-NEXT: ret <2 x float> [[RETVAL]] ; ; VC64-LABEL: define <2 x float> @test_simplify4vn( ; VC64-SAME: <2 x float> [[X:%.*]]) { -; VC64-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC64-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]]) ; VC64-NEXT: ret <2 x float> [[RETVAL]] ; ; NOLIB-LABEL: define <2 x float> @test_simplify4vn( ; NOLIB-SAME: <2 x float> [[X:%.*]]) { -; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]]) ; NOLIB-NEXT: ret <2 x float> [[RETVAL]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) @@ -573,7 +573,7 @@ define float @test_simplify5_noerrno(float %x) { define <2 x float> @test_simplify5v(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_simplify5v( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %retval @@ -610,7 +610,7 @@ define double @test_simplify6_noerrno(double %x) { define <2 x double> @test_simplify6v(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @test_simplify6v( ; CHECK-SAME: <2 x double> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %retval @@ -1317,7 +1317,7 @@ define float @pow_neg1_strict_noerrno(float %x) { define <2 x float> @pow_neg1_strictv(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @pow_neg1_strictv( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[RECIPROCAL]] ; %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) @@ -1357,7 +1357,7 @@ define double @pow_neg1_double_fast_noerrno(double %x) { define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @pow_neg1_double_fastv( ; CHECK-SAME: <2 x double> [[X:%.*]]) { -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> , [[X]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> splat (double 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) @@ -1533,7 +1533,7 @@ define bfloat @test_pow_10_bf16(bfloat %x) { define <2 x half> @test_pow_10_v2f16(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_pow_10_v2f16( ; CHECK-SAME: <2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> , <2 x half> [[X]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> splat (half 0xH4900), <2 x half> [[X]]) ; CHECK-NEXT: ret <2 x half> [[RETVAL]] ; %retval = call <2 x half> @llvm.pow.v2f16(<2 x half> , <2 x half> %x) @@ -1543,7 +1543,7 @@ define <2 x half> @test_pow_10_v2f16(<2 x half> %x) { define <2 x float> @test_pow_10_v2f32(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_10_v2f32( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 1.000000e+01), <2 x float> [[X]]) ; CHECK-NEXT: ret <2 x float> [[RETVAL]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) @@ -1553,7 +1553,7 @@ define <2 x float> @test_pow_10_v2f32(<2 x float> %x) { define <2 x double> @test_pow_10_v2f64(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @test_pow_10_v2f64( ; CHECK-SAME: <2 x double> [[X:%.*]]) { -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 1.000000e+01), <2 x double> [[X]]) ; CHECK-NEXT: ret <2 x double> [[RETVAL]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) @@ -1563,7 +1563,7 @@ define <2 x double> @test_pow_10_v2f64(<2 x double> %x) { define <2 x bfloat> @test_pow_10_v2bf16(<2 x bfloat> %x) { ; CHECK-LABEL: define <2 x bfloat> @test_pow_10_v2bf16( ; CHECK-SAME: <2 x bfloat> [[X:%.*]]) { -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> , <2 x bfloat> [[X]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> splat (bfloat 0xR4120), <2 x bfloat> [[X]]) ; CHECK-NEXT: ret <2 x bfloat> [[RETVAL]] ; %retval = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> , <2 x bfloat> %x) diff --git a/llvm/test/Transforms/InstCombine/pow-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-sqrt.ll index 9272c1c33a953ca..7eef670ccea3dcc 100644 --- a/llvm/test/Transforms/InstCombine/pow-sqrt.ll +++ b/llvm/test/Transforms/InstCombine/pow-sqrt.ll @@ -51,8 +51,8 @@ define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_half_approx( ; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], -; CHECK-NEXT: [[POW:%.*]] = select afn <2 x i1> [[ISINF]], <2 x double> , <2 x double> [[ABS]] +; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], splat (double 0xFFF0000000000000) +; CHECK-NEXT: [[POW:%.*]] = select afn <2 x i1> [[ISINF]], <2 x double> splat (double 0x7FF0000000000000), <2 x double> [[ABS]] ; CHECK-NEXT: ret <2 x double> [[POW]] ; %pow = call afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) @@ -206,7 +206,7 @@ define float @pow_libcall_neghalf_afn(float %x) { define <2 x double> @pow_intrinsic_neghalf_no_FMF(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_no_FMF( -; CHECK-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; CHECK-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> splat (double -5.000000e-01)) ; CHECK-NEXT: ret <2 x double> [[POW]] ; %pow = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) @@ -221,8 +221,8 @@ define <2 x double> @pow_intrinsic_neghalf_reassoc(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_reassoc( ; CHECK-NEXT: [[SQRT:%.*]] = call reassoc <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call reassoc <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp reassoc oeq <2 x double> [[X]], -; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc <2 x double> , [[ABS]] +; CHECK-NEXT: [[ISINF:%.*]] = fcmp reassoc oeq <2 x double> [[X]], splat (double 0xFFF0000000000000) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc <2 x double> splat (double 1.000000e+00), [[ABS]] ; CHECK-NEXT: [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[TMP1]] ; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; @@ -238,8 +238,8 @@ define <2 x double> @pow_intrinsic_neghalf_afn(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_afn( ; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], -; CHECK-NEXT: [[TMP1:%.*]] = fdiv afn <2 x double> , [[ABS]] +; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], splat (double 0xFFF0000000000000) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv afn <2 x double> splat (double 1.000000e+00), [[ABS]] ; CHECK-NEXT: [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[TMP1]] ; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; @@ -264,7 +264,7 @@ define <2 x double> @pow_intrinsic_neghalf_ninf(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_ninf( ; CHECK-NEXT: [[SQRT:%.*]] = call ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call ninf afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv ninf afn <2 x double> , [[ABS]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv ninf afn <2 x double> splat (double 1.000000e+00), [[ABS]] ; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; %pow = call afn ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) diff --git a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll index d31b7c9fe283594..0c4418a3094f96f 100644 --- a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll +++ b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll @@ -127,7 +127,7 @@ define half @pow_sitofp_f16_const_base_2(i32 %x) { define <2 x float> @pow_sitofp_v2f32_const_base_2(<2 x i32> %x) { ; CHECK-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[X]]) ; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x float> @@ -139,20 +139,20 @@ define <2 x float> @pow_sitofp_v2f32_const_base_8(<2 x i32> %x) { ; LDEXP-EXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8( ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-EXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float> -; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], +; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], splat (float 3.000000e+00) ; LDEXP-EXP2-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUL]]) ; LDEXP-EXP2-NEXT: ret <2 x float> [[EXP2]] ; ; LDEXP-NOEXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8( ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-NOEXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float> -; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[ITOFP]]) +; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 8.000000e+00), <2 x float> [[ITOFP]]) ; LDEXP-NOEXP2-NEXT: ret <2 x float> [[POW]] ; ; NOLDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8( ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { ; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float> -; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], +; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], splat (float 3.000000e+00) ; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUL]]) ; NOLDEXP-NEXT: ret <2 x float> [[EXP2]] ; @@ -176,7 +176,7 @@ define <2 x float> @pow_sitofp_v2f32_const_base_mixed_2(<2 x i32> %x) { define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(<2 x i32> %x) { ; CHECK-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[X]]) ; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x float> @@ -198,7 +198,7 @@ define @pow_sitofp_nxv4f32_const_base_2( define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) { ; CHECK-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> , <2 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> splat (half 0xH3C00), <2 x i32> [[X]]) ; CHECK-NEXT: ret <2 x half> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x half> @@ -209,7 +209,7 @@ define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) { define <2 x double> @pow_sitofp_v2f64_const_base_2(<2 x i32> %x) { ; CHECK-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> , <2 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> splat (double 1.000000e+00), <2 x i32> [[X]]) ; CHECK-NEXT: ret <2 x double> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x double> @@ -228,20 +228,20 @@ define <2 x half> @pow_sitofp_v2f16_const_base_8(<2 x i32> %x) { ; LDEXP-EXP2-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8( ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-EXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half> -; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], +; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], splat (half 0xH4200) ; LDEXP-EXP2-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.exp2.v2f16(<2 x half> [[MUL]]) ; LDEXP-EXP2-NEXT: ret <2 x half> [[EXP2]] ; ; LDEXP-NOEXP2-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8( ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-NOEXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half> -; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x half> @llvm.pow.v2f16(<2 x half> , <2 x half> [[ITOFP]]) +; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x half> @llvm.pow.v2f16(<2 x half> splat (half 0xH4800), <2 x half> [[ITOFP]]) ; LDEXP-NOEXP2-NEXT: ret <2 x half> [[POW]] ; ; NOLDEXP-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8( ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { ; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half> -; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], +; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], splat (half 0xH4200) ; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.exp2.v2f16(<2 x half> [[MUL]]) ; NOLDEXP-NEXT: ret <2 x half> [[EXP2]] ; @@ -261,20 +261,20 @@ define <2 x double> @pow_sitofp_v2f64_const_base_8(<2 x i32> %x) { ; LDEXP-EXP2-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8( ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-EXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double> -; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], +; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], splat (double 3.000000e+00) ; LDEXP-EXP2-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]]) ; LDEXP-EXP2-NEXT: ret <2 x double> [[EXP2]] ; ; LDEXP-NOEXP2-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8( ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-NOEXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double> -; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[ITOFP]]) +; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 8.000000e+00), <2 x double> [[ITOFP]]) ; LDEXP-NOEXP2-NEXT: ret <2 x double> [[POW]] ; ; NOLDEXP-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8( ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { ; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double> -; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], +; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], splat (double 3.000000e+00) ; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]]) ; NOLDEXP-NEXT: ret <2 x double> [[EXP2]] ; diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll index e5546075edc9bbb..4a75a84d7b66de9 100644 --- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll +++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll @@ -531,7 +531,7 @@ define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(<2 x i8> %x) { ; CHECK-LABEL: define <2 x float> @pow_sitofp_const_base_2_no_fast_vector( ; CHECK-SAME: <2 x i8> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32> -; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %s = sitofp <2 x i8> %x to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/pr14365.ll b/llvm/test/Transforms/InstCombine/pr14365.ll index 3a09b55aba309d9..1a51c174733778c 100644 --- a/llvm/test/Transforms/InstCombine/pr14365.ll +++ b/llvm/test/Transforms/InstCombine/pr14365.ll @@ -16,7 +16,7 @@ define i32 @test0(i32 %a0) { define <4 x i32> @test0_vec(<4 x i32> %a0) { ; CHECK-LABEL: @test0_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], splat (i32 -1431655766) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = and <4 x i32> %a0, @@ -44,8 +44,8 @@ define i32 @test1(i32 %a0) { define <4 x i32> @test1_vec(<4 x i32> %a0) { ; CHECK-LABEL: @test1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], splat (i32 1431655765) ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i32> [[A0]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/pr17827.ll b/llvm/test/Transforms/InstCombine/pr17827.ll index 6c6110aa073a59f..2f10bb5c7f25f0d 100644 --- a/llvm/test/Transforms/InstCombine/pr17827.ll +++ b/llvm/test/Transforms/InstCombine/pr17827.ll @@ -49,9 +49,9 @@ define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) { define <2 x i1> @test_shift_and_cmp_changed1_vec(<2 x i8> %p, <2 x i8> %q) { ; CHECK-LABEL: @test_shift_and_cmp_changed1_vec( -; CHECK-NEXT: [[ANDP:%.*]] = shl <2 x i8> [[P:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[ANDP]], -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[ANDP:%.*]] = shl <2 x i8> [[P:%.*]], splat (i8 5) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[ANDP]], splat (i8 -64) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], splat (i8 32) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %andp = and <2 x i8> %p, @@ -78,7 +78,7 @@ define i1 @test_shift_and_cmp_changed2(i8 %p) { define <2 x i1> @test_shift_and_cmp_changed2_vec(<2 x i8> %p) { ; CHECK-LABEL: @test_shift_and_cmp_changed2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], splat (i8 6) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll b/llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll index 92f55b211b63900..4cafc4b1d475c20 100644 --- a/llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll @@ -9,7 +9,7 @@ target datalayout = "p:16:16" define <4 x i1> @PR38984_1() { ; CHECK-LABEL: @PR38984_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %0 = load i16, ptr @offsets, align 1 diff --git a/llvm/test/Transforms/InstCombine/pr38984.ll b/llvm/test/Transforms/InstCombine/pr38984.ll index a7eddcfbe0845d7..9dfc2257dfe83b9 100644 --- a/llvm/test/Transforms/InstCombine/pr38984.ll +++ b/llvm/test/Transforms/InstCombine/pr38984.ll @@ -9,7 +9,7 @@ target datalayout = "p:16:16" define <4 x i1> @PR38984_1() { ; CHECK-LABEL: @PR38984_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %0 = load i16, ptr @offsets, align 1 diff --git a/llvm/test/Transforms/InstCombine/pr53357.ll b/llvm/test/Transforms/InstCombine/pr53357.ll index 0ae690869c1c443..2292abd0c906884 100644 --- a/llvm/test/Transforms/InstCombine/pr53357.ll +++ b/llvm/test/Transforms/InstCombine/pr53357.ll @@ -20,7 +20,7 @@ define i32 @src(i32 noundef %0, i32 noundef %1) { define <2 x i32> @src_vec(<2 x i32> noundef %0, <2 x i32> noundef %1) { ; CHECK-LABEL: @src_vec( ; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1:%.*]], [[TMP0:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %3 = and <2 x i32> %1, %0 @@ -34,7 +34,7 @@ define <2 x i32> @src_vec(<2 x i32> noundef %0, <2 x i32> noundef %1) { define <2 x i32> @src_vec_poison(<2 x i32> noundef %0, <2 x i32> noundef %1) { ; CHECK-LABEL: @src_vec_poison( ; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1:%.*]], [[TMP0:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %3 = and <2 x i32> %1, %0 diff --git a/llvm/test/Transforms/InstCombine/pr98435.ll b/llvm/test/Transforms/InstCombine/pr98435.ll index b400801d342fe7d..78c8e860bed72bb 100644 --- a/llvm/test/Transforms/InstCombine/pr98435.ll +++ b/llvm/test/Transforms/InstCombine/pr98435.ll @@ -4,7 +4,7 @@ define <2 x i1> @pr98435(<2 x i1> %val) { ; CHECK-LABEL: define <2 x i1> @pr98435( ; CHECK-SAME: <2 x i1> [[VAL:%.*]]) { -; CHECK-NEXT: [[VAL1:%.*]] = select <2 x i1> , <2 x i1> , <2 x i1> [[VAL]] +; CHECK-NEXT: [[VAL1:%.*]] = select <2 x i1> , <2 x i1> splat (i1 true), <2 x i1> [[VAL]] ; CHECK-NEXT: ret <2 x i1> [[VAL1]] ; %val1 = select <2 x i1> , <2 x i1> , <2 x i1> %val diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll index 24777b1b7f2085a..bcbd78e23ed67fd 100644 --- a/llvm/test/Transforms/InstCombine/ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/ptrmask.ll @@ -23,7 +23,7 @@ define ptr @ptrmask_combine_consecutive_preserve_attrs(ptr %p0, i64 %m1) { define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs(<2 x ptr> %p0, <2 x i64> %m1) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs ; CHECK-SAME: (<2 x ptr> [[P0:%.*]], <2 x i64> [[M1:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M1]], splat (i64 12345) ; CHECK-NEXT: [[R:%.*]] = call align 128 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[TMP1]]) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; @@ -114,7 +114,7 @@ define ptr addrspace(1) @ptrmask_combine_add_alignment2(ptr addrspace(1) align 3 define <2 x ptr> @ptrmask_combine_add_alignment_vec(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_add_alignment_vec ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[R:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -96)) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -435,7 +435,7 @@ define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented(<2 x ptr> align 8 %p) ; CHECK-LABEL: define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented ; CHECK-SAME: (<2 x ptr> align 8 [[P:%.*]]) { ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, <2 x ptr> [[P]], i64 17 -; CHECK-NEXT: [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> ) +; CHECK-NEXT: [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> splat (i64 -96)) ; CHECK-NEXT: ret <2 x ptr> [[PM]] ; %gep = getelementptr i8, <2 x ptr> %p, i32 17 @@ -520,7 +520,7 @@ define ptr @ptrmask_is_useless4(i64 %i, i64 %m) { define <2 x ptr> @ptrmask_is_useless_vec(<2 x i64> %i, <2 x i64> %m) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec ; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) { -; CHECK-NEXT: [[I0:%.*]] = and <2 x i64> [[I]], +; CHECK-NEXT: [[I0:%.*]] = and <2 x i64> [[I]], splat (i64 31) ; CHECK-NEXT: [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr> ; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M]]) ; CHECK-NEXT: ret <2 x ptr> [[R]] diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll index e3bcfde4ad59ca0..f505a0e99ec5016 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -57,9 +57,9 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i64> [[T1]], -; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i64> [[T1]], splat (i64 -1) +; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -127,9 +127,9 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i64> [[T1]], -; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i64> [[T1]], splat (i64 -1) +; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll index 8c61e24a97f1d04..c25b873b14a2ee6 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -57,9 +57,9 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <8 x i64> [[T1]], -; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <8 x i64> [[T1]], splat (i64 -1) +; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[X:%.*]], [[T2]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -128,8 +128,8 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], +; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1) ; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] ; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[X:%.*]], [[T3]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll index a78246781c7f9df..c82c4428c31fbec 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -51,8 +51,8 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32) ; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll index b79ab7909752707..325612f0a6d08a0 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -55,9 +55,9 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32) ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -89,7 +89,7 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -122,7 +122,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll index 097c5663c129c78..699e11dcf0358da 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -52,7 +52,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32) ; CHECK-NEXT: [[T3:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll index d8af0ca2199763a..6163aa8c7ac0cfe 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll @@ -52,7 +52,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32) ; CHECK-NEXT: [[T3:%.*]] = ashr exact <8 x i64> [[T1]], [[T0]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll index a7fd2717e7197cb..53ccc50d968a8c0 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll @@ -100,10 +100,10 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_splat( -; CHECK-NEXT: [[T1:%.*]] = shl nuw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add nsw <3 x i32> [[T1]], +; CHECK-NEXT: [[T1:%.*]] = shl nuw <3 x i32> splat (i32 1), [[NBITS:%.*]] +; CHECK-NEXT: [[T2:%.*]] = add nsw <3 x i32> [[T1]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[T2]], [[X:%.*]] -; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -129,8 +129,8 @@ define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t4_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t4_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <3 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nuw <3 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add nsw <3 x i32> [[T1]], +; CHECK-NEXT: [[T1:%.*]] = shl nuw <3 x i32> splat (i32 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add nsw <3 x i32> [[T1]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[T2]], [[X:%.*]] ; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> , [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll index e3c098138911635..98256ffbe53fb32 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll @@ -100,10 +100,10 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_splat( -; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -129,8 +129,8 @@ define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t4_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t4_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <3 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[X:%.*]], [[T2]] ; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> , [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll index 8428ef67d6b86b4..f12e3e460860fbd 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll @@ -59,9 +59,9 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = and <3 x i32> [[T0]], [[X:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -80,7 +80,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = and <3 x i32> [[T0]], [[X:%.*]] ; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll index 5d8ff9e9fb71bd5..731de2b94cf2697 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll @@ -67,10 +67,10 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] -; CHECK-NEXT: [[T3:%.*]] = add <3 x i32> [[NBITS]], +; CHECK-NEXT: [[T3:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -92,8 +92,8 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] ; CHECK-NEXT: [[T3:%.*]] = add <3 x i32> [[NBITS]], ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) @@ -118,7 +118,7 @@ define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t4_vec_poison(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t4_vec_poison( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll index 74864e4473b1147..1022a15fa04f569 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll @@ -60,7 +60,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = lshr exact <3 x i32> [[T0]], [[NBITS]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll index 68626d5a2eabbe0..4c03775ce753472 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll @@ -60,7 +60,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = ashr exact <3 x i32> [[T0]], [[NBITS]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll b/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll index 45880ac5efe51e7..ca65e07e2d853a0 100644 --- a/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll +++ b/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll @@ -39,7 +39,7 @@ define i32 @t1_sshr(i32 %data, i32 %nbits) { define <4 x i32> @t2_vec(<4 x i32> %data, <4 x i32> %nbits) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <4 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <4 x i32> splat (i32 -1), [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = and <4 x i32> [[T0]], [[DATA:%.*]] ; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> [[T1]], [[NBITS]] ; CHECK-NEXT: ret <4 x i32> [[T2]] diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll index 45db2cf67585241..e7d6cc7102c7136 100644 --- a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll +++ b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll @@ -98,7 +98,7 @@ define i8 @urem_XY_XZ_with_CY_lt_CZ_with_shl(i8 %X) { define <2 x i8> @urem_XY_XZ_with_CY_lt_CZ_with_nsw_out(<2 x i8> %X) { ; CHECK-LABEL: @urem_XY_XZ_with_CY_lt_CZ_with_nsw_out( -; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], splat (i8 2) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = shl nsw <2 x i8> %X, @@ -284,7 +284,7 @@ define i8 @srem_XY_XZ_with_CY_rem_CZ_eq_0_fail_missing_flag(i8 %X) { define <2 x i8> @srem_XY_XZ_with_CY_lt_CZ(<2 x i8> %X) { ; CHECK-LABEL: @srem_XY_XZ_with_CY_lt_CZ( -; CHECK-NEXT: [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = shl <2 x i8> %X, @@ -306,7 +306,7 @@ define i8 @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out(i8 %X) { define <2 x i8> @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out_with_shl(<2 x i8> %X) { ; CHECK-LABEL: @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out_with_shl( -; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> splat (i8 3), [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = shl nuw <2 x i8> , %X @@ -363,7 +363,7 @@ define i8 @srem_XY_XZ_with_CY_gt_CZ_with_nuw_out(i8 %X) { define <2 x i8> @srem_XY_XZ_with_CY_gt_CZ_no_nuw_out(<2 x i8> %X) { ; CHECK-LABEL: @srem_XY_XZ_with_CY_gt_CZ_no_nuw_out( -; CHECK-NEXT: [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = mul nsw <2 x i8> %X, diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll index 4f7687aeaf8bc8d..c1d8bb2b02b80e9 100644 --- a/llvm/test/Transforms/InstCombine/rem.ll +++ b/llvm/test/Transforms/InstCombine/rem.ll @@ -79,7 +79,7 @@ define i8 @urem_with_sext_bool_divisor(i1 %x, i8 %y) { define <2 x i8> @urem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i8> %y) { ; CHECK-LABEL: @urem_with_sext_bool_divisor_vec( ; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze <2 x i8> [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[Y_FROZEN]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[Y_FROZEN]], splat (i8 -1) ; CHECK-NEXT: [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> zeroinitializer, <2 x i8> [[Y_FROZEN]] ; CHECK-NEXT: ret <2 x i8> [[REM]] ; @@ -91,8 +91,8 @@ define <2 x i8> @urem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i8> %y) { define <2 x i4> @big_divisor_vec(<2 x i4> %x) { ; CHECK-LABEL: @big_divisor_vec( ; CHECK-NEXT: [[X_FR:%.*]] = freeze <2 x i4> [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i4> [[X_FR]], -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i4> [[X_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i4> [[X_FR]], splat (i4 -3) +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i4> [[X_FR]], splat (i4 3) ; CHECK-NEXT: [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> [[X_FR]], <2 x i4> [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[REM]] ; @@ -198,7 +198,7 @@ define i32 @test3(i32 %A) { define <2 x i32> @vec_power_of_2_constant_splat_divisor(<2 x i32> %A) { ; CHECK-LABEL: @vec_power_of_2_constant_splat_divisor( -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %B = urem <2 x i32> %A, @@ -207,7 +207,7 @@ define <2 x i32> @vec_power_of_2_constant_splat_divisor(<2 x i32> %A) { define <2 x i19> @weird_vec_power_of_2_constant_splat_divisor(<2 x i19> %A) { ; CHECK-LABEL: @weird_vec_power_of_2_constant_splat_divisor( -; CHECK-NEXT: [[B:%.*]] = and <2 x i19> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i19> [[A:%.*]], splat (i19 7) ; CHECK-NEXT: ret <2 x i19> [[B]] ; %B = urem <2 x i19> %A, @@ -227,7 +227,7 @@ define i1 @test3a(i32 %A) { define <2 x i1> @test3a_vec(<2 x i32> %A) { ; CHECK-LABEL: @test3a_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7) ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -669,8 +669,8 @@ define i32 @test22(i32 %A) { define <2 x i32> @test23(<2 x i32> %A) { ; CHECK-LABEL: @test23( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = urem <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) +; CHECK-NEXT: [[MUL:%.*]] = urem <2 x i32> [[AND]], splat (i32 2147483647) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %and = and <2 x i32> %A, @@ -691,7 +691,7 @@ define i1 @test24(i32 %A) { define <2 x i1> @test24_vec(<2 x i32> %A) { ; CHECK-LABEL: @test24_vec( -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[B]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -713,7 +713,7 @@ define i1 @test25(i32 %A) { define <2 x i1> @test25_vec(<2 x i32> %A) { ; CHECK-LABEL: @test25_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -1034,7 +1034,7 @@ define i32 @urem_select_of_constants_divisor(i1 %b, i32 %x) { define <2 x i32> @PR62401(<2 x i1> %x, <2 x i32> %y) { ; CHECK-LABEL: @PR62401( ; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze <2 x i32> [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[Y_FROZEN]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[Y_FROZEN]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> zeroinitializer, <2 x i32> [[Y_FROZEN]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll b/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll index 301ead708a08f75..d12cebe292e3915 100644 --- a/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll +++ b/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll @@ -63,8 +63,8 @@ define i32 @p3_sgt(i32 %x, i32 %y) { define <2 x i32> @p4_vec_splat_ult_65536(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p4_vec_splat_ult_65536( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp ult <2 x i32> %x, @@ -73,8 +73,8 @@ define <2 x i32> @p4_vec_splat_ult_65536(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p5_vec_splat_ugt(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p5_vec_splat_ugt( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ult <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp ugt <2 x i32> %x, @@ -83,8 +83,8 @@ define <2 x i32> @p5_vec_splat_ugt(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p6_vec_splat_slt_65536(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p6_vec_splat_slt_65536( -; CHECK-NEXT: [[T_INV:%.*]] = icmp sgt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp slt <2 x i32> %x, @@ -93,8 +93,8 @@ define <2 x i32> @p6_vec_splat_slt_65536(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p7_vec_splat_sgt(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p7_vec_splat_sgt( -; CHECK-NEXT: [[T_INV:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp sgt <2 x i32> %x, @@ -106,8 +106,8 @@ define <2 x i32> @p7_vec_splat_sgt(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @p8_vec_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p8_vec_nonsplat_poison0( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp ult <2 x i32> %x, @@ -116,7 +116,7 @@ define <2 x i32> @p8_vec_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p9_vec_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p9_vec_nonsplat_poison1( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -126,7 +126,7 @@ define <2 x i32> @p9_vec_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p10_vec_nonsplat_poison2(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p10_vec_nonsplat_poison2( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -349,9 +349,9 @@ define i32 @ult_inf_loop(i32 %x) { define <2 x i32> @ult_inf_loop_vec(<2 x i32> %x) { ; CHECK-LABEL: @ult_inf_loop_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 38) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -4) +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 -5), <2 x i32> splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %add = add <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index 35d15ea0bea238c..ea7c471594da0a9 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -56,7 +56,7 @@ define i88 @rotl_i88_constant_commute(i88 %x) { define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) { ; CHECK-LABEL: @rotl_v2i16_constant_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -67,7 +67,7 @@ define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) { define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) { ; CHECK-LABEL: @rotl_v2i16_constant_splat_poison0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -78,7 +78,7 @@ define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) { define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) { ; CHECK-LABEL: @rotl_v2i16_constant_splat_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -91,7 +91,7 @@ define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) { define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) { ; CHECK-LABEL: @rotr_v2i17_constant_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shl = shl <2 x i17> %x, @@ -102,7 +102,7 @@ define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) { define <2 x i17> @rotr_v2i17_constant_splat_poison0(<2 x i17> %x) { ; CHECK-LABEL: @rotr_v2i17_constant_splat_poison0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shl = shl <2 x i17> %x, @@ -113,7 +113,7 @@ define <2 x i17> @rotr_v2i17_constant_splat_poison0(<2 x i17> %x) { define <2 x i17> @rotr_v2i17_constant_splat_poison1(<2 x i17> %x) { ; CHECK-LABEL: @rotr_v2i17_constant_splat_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shl = shl <2 x i17> %x, @@ -238,7 +238,7 @@ define i8 @rotr_i8_commute(i8 %x, i8 %y) { define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @rotl_v4i32( -; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]] ; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]] @@ -255,7 +255,7 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) { define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) { ; CHECK-LABEL: @rotr_v3i42( -; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i42> , [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i42> splat (i42 42), [[Y:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]] ; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]] diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll index e4dd2d10637d3ee..68a621946374e89 100644 --- a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll +++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll @@ -51,7 +51,7 @@ define { i8, i1 } @no_fold_on_constant_add_overflow(i8 %x) { define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant( -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nsw <2 x i32> %x, @@ -62,7 +62,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { ; CHECK-LABEL: @no_fold_splat_undef_constant( ; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nsw <2 x i32> %x, @@ -73,7 +73,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @no_fold_splat_not_constant( ; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nsw <2 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll index 1cce297122f8a18..d27e7aa28d62c9c 100644 --- a/llvm/test/Transforms/InstCombine/sadd_sat.ll +++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -405,8 +405,8 @@ define <4 x i32> @sadd_satv4i4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sadd_satv4i4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> ) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> ) +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 15)) +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> splat (i32 -16)) ; CHECK-NEXT: ret <4 x i32> [[SPEC_STORE_SELECT8]] ; entry: @@ -422,8 +422,8 @@ define <4 x i32> @ssub_satv4i4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @ssub_satv4i4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> ) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> ) +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 15)) +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> splat (i32 -16)) ; CHECK-NEXT: ret <4 x i32> [[SPEC_STORE_SELECT8]] ; entry: @@ -785,8 +785,8 @@ define <2 x i8> @ashrv2i8(<2 x i16> %a, <2 x i8> %b) { ; CHECK-NEXT: [[CONV:%.*]] = ashr <2 x i16> [[A:%.*]], ; CHECK-NEXT: [[CONV1:%.*]] = sext <2 x i8> [[B:%.*]] to <2 x i16> ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i16> [[CONV]], [[CONV1]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ADD]], <2 x i16> ) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> ) +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ADD]], <2 x i16> splat (i16 -128)) +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> splat (i16 127)) ; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw <2 x i16> [[SPEC_STORE_SELECT8]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[CONV7]] ; @@ -805,7 +805,7 @@ entry: define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) { ; CHECK-LABEL: @ashrv2i8_s( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 8) ; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw <2 x i16> [[TMP0]] to <2 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP1]], <2 x i8> [[B:%.*]]) ; CHECK-NEXT: ret <2 x i8> [[TMP2]] diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index af8a9314a08049b..9236d96f59a55b7 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -61,7 +61,7 @@ define i8 @test_scalar_uadd_combine(i8 %a) { define <2 x i8> @test_vector_uadd_combine(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_combine( -; CHECK-NEXT: [[X2:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[X2:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30)) ; CHECK-NEXT: ret <2 x i8> [[X2]] ; %x1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -93,7 +93,7 @@ define i8 @test_scalar_uadd_overflow(i8 %a) { define <2 x i8> @test_vector_uadd_overflow(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_overflow( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) %y2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %y1, <2 x i8> ) @@ -113,7 +113,7 @@ define i8 @test_scalar_sadd_both_positive(i8 %a) { define <2 x i8> @test_vector_sadd_both_positive(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_both_positive( -; CHECK-NEXT: [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30)) ; CHECK-NEXT: ret <2 x i8> [[Z2]] ; %z1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -133,7 +133,7 @@ define i8 @test_scalar_sadd_both_negative(i8 %a) { define <2 x i8> @test_vector_sadd_both_negative(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_both_negative( -; CHECK-NEXT: [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30)) ; CHECK-NEXT: ret <2 x i8> [[U2]] ; %u1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -177,7 +177,7 @@ define i8 @test_scalar_uadd_neg_neg(i8 %a) { define <2 x i8> @test_vector_uadd_neg_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_neg_neg( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %a_neg = or <2 x i8> %a, %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> ) @@ -198,7 +198,7 @@ define i8 @test_scalar_uadd_nneg_nneg(i8 %a) { define <2 x i8> @test_vector_uadd_nneg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_nneg_nneg( -; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i8> [[A_NNEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -221,7 +221,7 @@ define i8 @test_scalar_uadd_neg_nneg(i8 %a) { define <2 x i8> @test_vector_uadd_neg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_neg_nneg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -243,8 +243,8 @@ define i8 @test_scalar_uadd_never_overflows(i8 %a) { define <2 x i8> @test_vector_uadd_never_overflows(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_never_overflows( -; CHECK-NEXT: [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[A_MASKED]], +; CHECK-NEXT: [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 -127) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[A_MASKED]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a_masked = and <2 x i8> %a, @@ -263,7 +263,7 @@ define i8 @test_scalar_uadd_always_overflows(i8 %a) { define <2 x i8> @test_vector_uadd_always_overflows(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_always_overflows( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %a_masked = or <2 x i8> %a, %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_masked, <2 x i8> ) @@ -284,7 +284,7 @@ define i8 @test_scalar_sadd_neg_nneg(i8 %a) { define <2 x i8> @test_vector_sadd_neg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_neg_nneg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_NEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -307,7 +307,7 @@ define i8 @test_scalar_sadd_nneg_neg(i8 %a) { define <2 x i8> @test_vector_sadd_nneg_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_nneg_neg( -; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_NNEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -330,7 +330,7 @@ define i8 @test_scalar_sadd_neg_neg(i8 %a) { define <2 x i8> @test_vector_sadd_neg_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_neg_neg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -474,7 +474,7 @@ define i8 @test_scalar_ssub_canonical(i8 %a) { define <2 x i8> @test_vector_ssub_canonical(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_canonical( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -10)) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -550,7 +550,7 @@ define i8 @test_simplify_decrement_ne(i8 %a) { define <2 x i8> @test_simplify_decrement_vec(<2 x i8> %a) { ; CHECK-LABEL: @test_simplify_decrement_vec( -; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1)) ; CHECK-NEXT: ret <2 x i8> [[I2]] ; %i = icmp eq <2 x i8> %a, @@ -561,7 +561,7 @@ define <2 x i8> @test_simplify_decrement_vec(<2 x i8> %a) { define <2 x i8> @test_simplify_decrement_vec_poison(<2 x i8> %a) { ; CHECK-LABEL: @test_simplify_decrement_vec_poison( -; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1)) ; CHECK-NEXT: ret <2 x i8> [[I2]] ; %i = icmp eq <2 x i8> %a, @@ -636,7 +636,7 @@ define i8 @test_invalid_simplify_other(i8 %a, i8 %b) { define <2 x i8> @test_vector_usub_combine(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_combine( -; CHECK-NEXT: [[X2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[X2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30)) ; CHECK-NEXT: ret <2 x i8> [[X2]] ; %x1 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -688,7 +688,7 @@ define i8 @test_scalar_ssub_both_positive(i8 %a) { define <2 x i8> @test_vector_ssub_both_positive(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_both_positive( -; CHECK-NEXT: [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30)) ; CHECK-NEXT: ret <2 x i8> [[Z2]] ; %z1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -708,7 +708,7 @@ define i8 @test_scalar_ssub_both_negative(i8 %a) { define <2 x i8> @test_vector_ssub_both_negative(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_both_negative( -; CHECK-NEXT: [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30)) ; CHECK-NEXT: ret <2 x i8> [[U2]] ; %u1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -741,7 +741,7 @@ define i8 @test_scalar_sadd_ssub(i8 %a) { define <2 x i8> @test_vector_sadd_ssub(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_ssub( -; CHECK-NEXT: [[V2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[V2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30)) ; CHECK-NEXT: ret <2 x i8> [[V2]] ; %v1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> , <2 x i8> %a) @@ -794,7 +794,7 @@ define i8 @test_scalar_usub_neg_nneg(i8 %a) { define <2 x i8> @test_vector_usub_neg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_neg_nneg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A_NEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -817,7 +817,7 @@ define i8 @test_scalar_usub_nneg_nneg(i8 %a) { define <2 x i8> @test_vector_usub_nneg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_nneg_nneg( -; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A_NNEG]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -839,8 +839,8 @@ define i8 @test_scalar_usub_never_overflows(i8 %a) { define <2 x i8> @test_vector_usub_never_overflows(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_never_overflows( -; CHECK-NEXT: [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_MASKED]], +; CHECK-NEXT: [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 64) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_MASKED]], splat (i8 -10) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a_masked = or <2 x i8> %a, @@ -880,7 +880,7 @@ define i8 @test_scalar_ssub_neg_neg(i8 %a) { define <2 x i8> @test_vector_ssub_neg_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_neg_neg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_NEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -903,7 +903,7 @@ define i8 @test_scalar_ssub_nneg_nneg(i8 %a) { define <2 x i8> @test_vector_ssub_nneg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_nneg_nneg( -; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_NNEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -926,7 +926,7 @@ define i8 @test_scalar_ssub_neg_nneg(i8 %a) { define <2 x i8> @test_vector_ssub_neg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_neg_nneg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1044,7 +1044,7 @@ define i8 @test_scalar_usub_add_nuw_inferred(i8 %a) { define <2 x i8> @test_vector_usub_add_nuw_no_ov(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov( -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %b = add nuw <2 x i8> %a, @@ -1076,7 +1076,7 @@ define <3 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat1_poison(<3 x i8> %a) { define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat2(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat2( ; CHECK-NEXT: [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> splat (i8 9)) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %b = add nuw <2 x i8> %a, @@ -1124,8 +1124,8 @@ define i8 @test_scalar_ssub_add_nsw_may_ov(i8 %a, i8 %b) { define <2 x i8> @test_vector_ssub_add_nsw_no_ov_splat(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_splat( -; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], splat (i8 7) +; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7) ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1137,7 +1137,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_splat(<2 x i8> %a, <2 x i8> %b) define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat1(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat1( -; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], splat (i8 7) ; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]] ; CHECK-NEXT: ret <2 x i8> [[R]] @@ -1151,7 +1151,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat1(<2 x i8> %a, <2 x i8> define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat2(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat2( ; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[AA]], <2 x i8> [[BB]]) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1459,7 +1459,7 @@ define i32 @uadd_sat_negative_one(i32 %x) { define <2 x i8> @uadd_sat_flipped4_vector(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_vector( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9)) ; CHECK-NEXT: ret <2 x i8> [[COND]] ; %cmp = icmp ult <2 x i8> %x, @@ -1470,7 +1470,7 @@ define <2 x i8> @uadd_sat_flipped4_vector(<2 x i8> %x) { define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9)) ; CHECK-NEXT: ret <2 x i8> [[COND]] ; %cmp = icmp ult <2 x i8> %x, @@ -1481,7 +1481,7 @@ define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) { define <2 x i8> @uadd_sat_flipped4_poison_vector_compare(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector_compare( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9)) ; CHECK-NEXT: ret <2 x i8> [[COND]] ; %cmp = icmp ult <2 x i8> %x, @@ -1492,7 +1492,7 @@ define <2 x i8> @uadd_sat_flipped4_poison_vector_compare(<2 x i8> %x) { define <2 x i8> @uadd_sat_flipped4_poison_vector_compare2(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector_compare2( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %cmp = icmp ult <2 x i8> %x, %add = add <2 x i8> %x, @@ -1878,7 +1878,7 @@ define i32 @uadd_sat_not_uge(i32 %x, i32 %y) { define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) { ; CHECK-LABEL: @uadd_sat_not_ugt_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], -; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]]) ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -1895,7 +1895,7 @@ define <2 x i32> @uadd_sat_not_ugt_commute_add_partial_poison(<2 x i32> %x, <2 x ; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i32> [[YP:%.*]], [[NOTX]] ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[YP]], [[X]] -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> , <2 x i32> [[A]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> splat (i32 -1), <2 x i32> [[A]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %notx = xor <2 x i32> %x, @@ -1950,7 +1950,7 @@ define <2 x i32> @uadd_sat_not_commute_select_ugt(<2 x i32> %xp, <2 x i32> %yp) ; CHECK-LABEL: @uadd_sat_not_commute_select_ugt( ; CHECK-NEXT: [[X:%.*]] = urem <2 x i32> , [[XP:%.*]] ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i32> , [[YP:%.*]] -; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]]) ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -2057,7 +2057,7 @@ define i32 @uadd_sat_canon_y_nuw(i32 %x, i32 %y) { define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec( -; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42)) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = add <4 x i32> %x, @@ -2068,7 +2068,7 @@ define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) { define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec_commute( -; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42)) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = add <4 x i32> %x, @@ -2079,7 +2079,7 @@ define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) { define <4 x i32> @uadd_sat_constant_vec_commute_undefs(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec_commute_undefs( -; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42)) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = add <4 x i32> %x, @@ -2198,7 +2198,7 @@ define i32 @unsigned_sat_constant_using_min(i32 %x) { define <2 x i32> @unsigned_sat_constant_using_min_splat(<2 x i32> %x) { ; CHECK-LABEL: @unsigned_sat_constant_using_min_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 -15)) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %c = icmp ult <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll index 591437b72c1fc41..a6931b4c41d2d76 100644 --- a/llvm/test/Transforms/InstCombine/scalarization.ll +++ b/llvm/test/Transforms/InstCombine/scalarization.ll @@ -161,7 +161,7 @@ define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) { define i8 @extract_element_binop_splat_variable_index_may_trap(<4 x i8> %x, <4 x i8> %y, i32 %z) { ; ; CHECK-LABEL: @extract_element_binop_splat_variable_index_may_trap( -; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i8> splat (i8 42), [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll index 3184395ccb12b88..e3e4a0c4a8f9cbd 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll @@ -107,8 +107,8 @@ define i32 @sdiv_abs_nsw(i32 %x) { define <4 x i32> @sdiv_abs_nsw_vec(<4 x i32> %x) { ; CHECK-LABEL: @sdiv_abs_nsw_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 1), <4 x i32> splat (i32 -1) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll index 3451c5e626f308b..a3133ef82d5525d 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll @@ -27,7 +27,7 @@ define i8 @n1(i8 %x) { define <2 x i8> @t2_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @t2_vec_splat( -; CHECK-NEXT: [[DIV_NEG:%.*]] = ashr exact <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[DIV_NEG:%.*]] = ashr exact <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[DIV:%.*]] = sub nsw <2 x i8> zeroinitializer, [[DIV_NEG]] ; CHECK-NEXT: ret <2 x i8> [[DIV]] ; @@ -100,8 +100,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) { define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5) +; CHECK-NEXT: [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], splat (i8 4) ; CHECK-NEXT: [[D:%.*]] = sub nsw <2 x i8> zeroinitializer, [[D_NEG]] ; CHECK-NEXT: ret <2 x i8> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll index 36af8685f9ae399..f463f27acc882f4 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll @@ -37,7 +37,7 @@ define i8 @n2(i8 %x) { define <2 x i8> @t3_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @t3_vec_splat( -; CHECK-NEXT: [[DIV:%.*]] = ashr exact <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = ashr exact <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: ret <2 x i8> [[DIV]] ; %div = sdiv exact <2 x i8> %x, @@ -144,8 +144,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) { define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i8> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i8> [[A]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[D]] ; %a = shl <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/select-and-or.ll b/llvm/test/Transforms/InstCombine/select-and-or.ll index 68bd28cf234b47e..c3fbb3fe15e19ea 100644 --- a/llvm/test/Transforms/InstCombine/select-and-or.ll +++ b/llvm/test/Transforms/InstCombine/select-and-or.ll @@ -539,7 +539,7 @@ define i1 @and_or2_multiuse(i1 %a, i1 %b, i1 %c) { define <2 x i1> @and_or1_vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_or1_vec( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -553,7 +553,7 @@ define <2 x i1> @and_or1_vec(<2 x i1> %a, <2 x i1> %b) { define <2 x i1> @and_or2_vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_or2_vec( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> , <2 x i1> [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> splat (i1 true), <2 x i1> [[A:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -567,7 +567,7 @@ define <2 x i1> @and_or2_vec(<2 x i1> %a, <2 x i1> %b) { define <2 x i1> @and_or1_vec_commuted(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_or1_vec_commuted( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -581,7 +581,7 @@ define <2 x i1> @and_or1_vec_commuted(<2 x i1> %a, <2 x i1> %b) { define <2 x i1> @and_or2_vec_commuted(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_or2_vec_commuted( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> , <2 x i1> [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> splat (i1 true), <2 x i1> [[A:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -673,7 +673,7 @@ define i1 @and_or3_multiuse(i1 %a, i1 %b, i32 %x, i32 %y) { define <2 x i1> @and_or3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @and_or3_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> , <2 x i1> [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> splat (i1 true), <2 x i1> [[A:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -686,7 +686,7 @@ define <2 x i1> @and_or3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> % define <2 x i1> @and_or3_vec_commuted(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @and_or3_vec_commuted( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> , <2 x i1> [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> splat (i1 true), <2 x i1> [[A:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -791,7 +791,7 @@ define <2 x i1> @or_and1_vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_and1_vec( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[A:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> , <2 x i1> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = call <2 x i1> @gen_v2i1() @@ -805,7 +805,7 @@ define <2 x i1> @or_and2_vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_and2_vec( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = call <2 x i1> @gen_v2i1() @@ -819,7 +819,7 @@ define <2 x i1> @or_and1_vec_commuted(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_and1_vec_commuted( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[A:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> , <2 x i1> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = call <2 x i1> @gen_v2i1() @@ -833,7 +833,7 @@ define <2 x i1> @or_and2_vec_commuted(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_and2_vec_commuted( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = call <2 x i1> @gen_v2i1() @@ -938,7 +938,7 @@ define <2 x i1> @or_and3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> % ; CHECK-LABEL: @or_and3_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = icmp eq <2 x i32> %x, %y @@ -951,7 +951,7 @@ define <2 x i1> @or_and3_vec_commuted(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 ; CHECK-LABEL: @or_and3_vec_commuted( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = icmp eq <2 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/select-bitext.ll b/llvm/test/Transforms/InstCombine/select-bitext.ll index 4c09448aa833910..553d5ae155153fc 100644 --- a/llvm/test/Transforms/InstCombine/select-bitext.ll +++ b/llvm/test/Transforms/InstCombine/select-bitext.ll @@ -51,7 +51,7 @@ define i64 @sel_sext(i32 %a, i1 %cmp) { define <4 x i64> @sel_sext_vec(<4 x i32> %a, <4 x i1> %cmp) { ; CHECK-LABEL: @sel_sext_vec( ; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i32> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> +; CHECK-NEXT: [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> splat (i64 42) ; CHECK-NEXT: ret <4 x i64> [[EXT]] ; %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> @@ -73,7 +73,7 @@ define i64 @sel_zext(i32 %a, i1 %cmp) { define <4 x i64> @sel_zext_vec(<4 x i32> %a, <4 x i1> %cmp) { ; CHECK-LABEL: @sel_zext_vec( ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> +; CHECK-NEXT: [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> splat (i64 42) ; CHECK-NEXT: ret <4 x i64> [[EXT]] ; %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> @@ -152,8 +152,8 @@ define i32 @trunc_sel_equal_sext(i32 %a, i1 %cmp) { define <2 x i32> @trunc_sel_equal_sext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_equal_sext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16) +; CHECK-NEXT: [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; @@ -178,7 +178,7 @@ define i64 @trunc_sel_larger_zext(i32 %a, i1 %cmp) { define <2 x i64> @trunc_sel_larger_zext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_larger_zext_vec( -; CHECK-NEXT: [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 65535) ; CHECK-NEXT: [[TMP1:%.*]] = zext nneg <2 x i32> [[TRUNC_MASK]] to <2 x i64> ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[EXT]] @@ -205,7 +205,7 @@ define i32 @trunc_sel_smaller_zext(i64 %a, i1 %cmp) { define <2 x i32> @trunc_sel_smaller_zext_vec(<2 x i64> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_smaller_zext_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65535) ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; @@ -229,7 +229,7 @@ define i32 @trunc_sel_equal_zext(i32 %a, i1 %cmp) { define <2 x i32> @trunc_sel_equal_zext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_equal_zext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 65535) ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; @@ -578,7 +578,7 @@ define i32 @sext_true_val_must_be_all_ones(i1 %x) { define <2 x i32> @sext_true_val_must_be_all_ones_vec(<2 x i1> %x) { ; CHECK-LABEL: @sext_true_val_must_be_all_ones_vec( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> , <2 x i32> , !prof [[PROF0]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 -1), <2 x i32> , !prof [[PROF0]] ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %ext = sext <2 x i1> %x to <2 x i32> @@ -598,7 +598,7 @@ define i32 @zext_true_val_must_be_one(i1 %x) { define <2 x i32> @zext_true_val_must_be_one_vec(<2 x i1> %x) { ; CHECK-LABEL: @zext_true_val_must_be_one_vec( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> , <2 x i32> , !prof [[PROF0]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 1), <2 x i32> , !prof [[PROF0]] ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %ext = zext <2 x i1> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/select-divrem.ll b/llvm/test/Transforms/InstCombine/select-divrem.ll index e11afd7b543b20b..a674f9c64b2001a 100644 --- a/llvm/test/Transforms/InstCombine/select-divrem.ll +++ b/llvm/test/Transforms/InstCombine/select-divrem.ll @@ -285,7 +285,7 @@ define i32 @rem_euclid_wrong_operands_select(i32 %0) { define <2 x i32> @rem_euclid_vec(<2 x i32> %0) { ; CHECK-LABEL: @rem_euclid_vec( -; CHECK-NEXT: [[SEL:%.*]] = and <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[SEL:%.*]] = and <2 x i32> [[TMP0:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %rem = srem <2 x i32> %0, diff --git a/llvm/test/Transforms/InstCombine/select-extractelement.ll b/llvm/test/Transforms/InstCombine/select-extractelement.ll index e5b4fe5051e1035..621d278dee6e5f5 100644 --- a/llvm/test/Transforms/InstCombine/select-extractelement.ll +++ b/llvm/test/Transforms/InstCombine/select-extractelement.ll @@ -215,7 +215,7 @@ define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1 define i32 @inf_loop_partial_undef(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @inf_loop_partial_undef( -; CHECK-NEXT: [[T5:%.*]] = add nsw <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[T5:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[T6:%.*]] = icmp sge <2 x i32> [[T5]], [[X:%.*]] ; CHECK-NEXT: [[AB:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[T7:%.*]] = select <2 x i1> [[AB]], <2 x i1> [[T6]], <2 x i1> diff --git a/llvm/test/Transforms/InstCombine/select-factorize.ll b/llvm/test/Transforms/InstCombine/select-factorize.ll index ab9d9f6b24754d5..dc0e3b54cca578f 100644 --- a/llvm/test/Transforms/InstCombine/select-factorize.ll +++ b/llvm/test/Transforms/InstCombine/select-factorize.ll @@ -101,7 +101,7 @@ define i1 @logic_and_logic_or_8(i1 %c, i1 %a, i1 %b) { define <3 x i1> @logic_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_and_logic_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -113,7 +113,7 @@ define <3 x i1> @logic_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b define <3 x i1> @logic_and_logic_or_vector_poison1(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_and_logic_or_vector_poison1( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -127,7 +127,7 @@ define <3 x i1> @logic_and_logic_or_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 ; CHECK-LABEL: @logic_and_logic_or_vector_poison2( ; CHECK-NEXT: [[AC:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[A:%.*]], <3 x i1> ; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[C]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[AC]], <3 x i1> , <3 x i1> [[BC]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[AC]], <3 x i1> splat (i1 true), <3 x i1> [[BC]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = select <3 x i1> %c, <3 x i1> %a, <3 x i1> @@ -138,7 +138,7 @@ define <3 x i1> @logic_and_logic_or_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 define <3 x i1> @logic_and_logic_or_vector_poison3(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_and_logic_or_vector_poison3( -; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[BC]], <3 x i1> ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -265,7 +265,7 @@ define i1 @and_logic_and_logic_or_8(i1 %c, i1 %a, i1 %b) { define <3 x i1> @and_logic_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_logic_and_logic_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -277,7 +277,7 @@ define <3 x i1> @and_logic_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1 define <3 x i1> @and_logic_and_logic_or_vector_poison1(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_logic_and_logic_or_vector_poison1( -; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[BC]], <3 x i1> ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -289,7 +289,7 @@ define <3 x i1> @and_logic_and_logic_or_vector_poison1(<3 x i1> %c, <3 x i1> %a, define <3 x i1> @and_logic_and_logic_or_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_logic_and_logic_or_vector_poison2( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -342,7 +342,7 @@ define i1 @and_and_logic_or_2(i1 %c, i1 %a, i1 %b) { define <3 x i1> @and_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_and_logic_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = and <3 x i1> [[C:%.*]], [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -354,7 +354,7 @@ define <3 x i1> @and_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) define <3 x i1> @and_and_logic_or_vector_poison(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_and_logic_or_vector_poison( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = and <3 x i1> [[C:%.*]], [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -480,7 +480,7 @@ define i1 @logic_or_logic_and_8(i1 %c, i1 %a, i1 %b) { define <3 x i1> @logic_or_logic_and_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_or_logic_and_vector( ; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = select <3 x i1> %c, <3 x i1> , <3 x i1> %a @@ -492,7 +492,7 @@ define <3 x i1> @logic_or_logic_and_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b define <3 x i1> @logic_or_logic_and_vector_poison1(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_or_logic_and_vector_poison1( ; CHECK-NEXT: [[AC:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[A:%.*]] -; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[C]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[C]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[AC]], <3 x i1> [[BC]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -517,7 +517,7 @@ define <3 x i1> @logic_or_logic_and_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 define <3 x i1> @logic_or_logic_and_vector_poison3(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_or_logic_and_vector_poison3( ; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = select <3 x i1> %c, <3 x i1> , <3 x i1> %a @@ -644,7 +644,7 @@ define i1 @or_logic_or_logic_and_8(i1 %c, i1 %a, i1 %b) { define <3 x i1> @or_logic_or_logic_and_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @or_logic_or_logic_and_vector( ; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = or <3 x i1> %c, %a @@ -668,7 +668,7 @@ define <3 x i1> @or_logic_or_logic_and_vector_poison1(<3 x i1> %c, <3 x i1> %a, define <3 x i1> @or_logic_or_logic_and_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @or_logic_or_logic_and_vector_poison2( ; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = or <3 x i1> %c, %a diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll b/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll index fe794a33ece2d2c..d62edf2fe393309 100644 --- a/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll +++ b/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll @@ -17,7 +17,7 @@ define i32 @test_eq(i32 %x) { define <2 x i32> @test_eq_vect(<2 x i32> %x) { ; CHECK-LABEL: @test_eq_vect( -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %shl.mask = and <2 x i32> %x, @@ -41,7 +41,7 @@ define i32 @test_ne(i32 %x) { define <2 x i32> @test_ne_vect(<2 x i32> %x) { ; CHECK-LABEL: @test_ne_vect( -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %shl.mask = and <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll index a57a7c5e32e4942..219a66c314a07d8 100644 --- a/llvm/test/Transforms/InstCombine/select-icmp-and.ll +++ b/llvm/test/Transforms/InstCombine/select-icmp-and.ll @@ -40,8 +40,8 @@ define i32 @test35(i32 %x) { define <2 x i32> @test35vec(<2 x i32> %x) { ; CHECK-LABEL: @test35vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 60), <2 x i32> splat (i32 100) ; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp sge <2 x i32> %x, @@ -77,7 +77,7 @@ define i32 @test36(i32 %x) { define <2 x i32> @test36vec(<2 x i32> %x) { ; CHECK-LABEL: @test36vec( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 60), <2 x i32> splat (i32 100) ; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp slt <2 x i32> %x, @@ -98,8 +98,8 @@ define i32 @test37(i32 %x) { define <2 x i32> @test37vec(<2 x i32> %x) { ; CHECK-LABEL: @test37vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 1), <2 x i32> splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp sgt <2 x i32> %x, @@ -122,9 +122,9 @@ define i32 @test65(i64 %x) { define <2 x i32> @test65vec(<2 x i64> %x) { ; CHECK-LABEL: @test65vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 16) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and <2 x i64> %x, @@ -148,9 +148,9 @@ define i32 @test66(i64 %x) { define <2 x i32> @test66vec(<2 x i64> %x) { ; CHECK-LABEL: @test66vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 4294967296) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and <2 x i64> %x, @@ -164,7 +164,7 @@ define <2 x i32> @test66vec_scalar_and(i64 %x) { ; CHECK-LABEL: @test66vec_scalar_and( ; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and i64 %x, 4294967296 @@ -188,9 +188,9 @@ define i32 @test67(i16 %x) { define <2 x i32> @test67vec(<2 x i16> %x) { ; CHECK-LABEL: @test67vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 4) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and <2 x i16> %x, @@ -214,9 +214,9 @@ define i32 @test71(i32 %x) { define <2 x i32> @test71vec(<2 x i32> %x) { ; CHECK-LABEL: @test71vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and <2 x i32> %x, @@ -240,9 +240,9 @@ define i32 @test72(i32 %x) { define <2 x i32> @test72vec(<2 x i32> %x) { ; CHECK-LABEL: @test72vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42) ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i32> %x, @@ -266,9 +266,9 @@ define i32 @test73(i32 %x) { define <2 x i32> @test73vec(<2 x i32> %x) { ; CHECK-LABEL: @test73vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42) ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = trunc <2 x i32> %x to <2 x i8> @@ -290,8 +290,8 @@ define i32 @test74(i32 %x) { define <2 x i32> @test74vec(<2 x i32> %x) { ; CHECK-LABEL: @test74vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = icmp sgt <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/select-masked_load.ll b/llvm/test/Transforms/InstCombine/select-masked_load.ll index 51525e5ee83467b..b6bac612d6f9b24 100644 --- a/llvm/test/Transforms/InstCombine/select-masked_load.ll +++ b/llvm/test/Transforms/InstCombine/select-masked_load.ll @@ -38,7 +38,7 @@ define <4 x i32> @masked_load_and_zero_inactive_3(ptr %ptr, <4 x i1> %mask, <4 x ; Remove redundant select when its mask doesn't overlap with the load mask. define <4 x i32> @masked_load_and_zero_inactive_4(ptr %ptr, <4 x i1> %inv_mask) { ; CHECK-LABEL: @masked_load_and_zero_inactive_4( -; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], +; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[PTR:%.*]], i32 4, <4 x i1> [[MASK]], <4 x i32> zeroinitializer) ; CHECK-NEXT: ret <4 x i32> [[LOAD]] ; @@ -51,7 +51,7 @@ define <4 x i32> @masked_load_and_zero_inactive_4(ptr %ptr, <4 x i1> %inv_mask) ; As above but reuse the load's existing passthrough. define <4 x i32> @masked_load_and_zero_inactive_5(ptr %ptr, <4 x i1> %inv_mask) { ; CHECK-LABEL: @masked_load_and_zero_inactive_5( -; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], +; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[PTR:%.*]], i32 4, <4 x i1> [[MASK]], <4 x i32> zeroinitializer) ; CHECK-NEXT: ret <4 x i32> [[LOAD]] ; @@ -64,7 +64,7 @@ define <4 x i32> @masked_load_and_zero_inactive_5(ptr %ptr, <4 x i1> %inv_mask) ; No transform when the load's passthrough cannot be reused or altered. define <4 x i32> @masked_load_and_zero_inactive_6(ptr %ptr, <4 x i1> %inv_mask, <4 x i32> %passthrough) { ; CHECK-LABEL: @masked_load_and_zero_inactive_6( -; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], +; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[PTR:%.*]], i32 4, <4 x i1> [[MASK]], <4 x i32> [[PASSTHROUGH:%.*]]) ; CHECK-NEXT: [[MASKED:%.*]] = select <4 x i1> [[INV_MASK]], <4 x i32> zeroinitializer, <4 x i32> [[LOAD]] ; CHECK-NEXT: ret <4 x i32> [[MASKED]] @@ -91,7 +91,7 @@ define <4 x i32> @masked_load_and_zero_inactive_7(ptr %ptr, <4 x i1> %mask1, <4 ; load's inactive lanes and thus the load's passthrough takes effect. define <4 x float> @masked_load_and_zero_inactive_8(ptr %ptr, <4 x i1> %inv_mask, <4 x i1> %cond) { ; CHECK-LABEL: @masked_load_and_zero_inactive_8( -; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], +; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[PG:%.*]] = and <4 x i1> [[COND:%.*]], [[MASK]] ; CHECK-NEXT: [[LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[PTR:%.*]], i32 4, <4 x i1> [[PG]], <4 x float> zeroinitializer) ; CHECK-NEXT: ret <4 x float> [[LOAD]] diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll index 0c7624018cb02c3..4495e7f6ed067a5 100644 --- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll +++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll @@ -22,7 +22,7 @@ define i32 @and_lshr_and(i32 %arg) { define <2 x i32> @and_lshr_and_splatvec(<2 x i32> %arg) { ; CHECK-LABEL: @and_lshr_and_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 3) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T4]] @@ -111,7 +111,7 @@ define i32 @and_and(i32 %arg) { define <2 x i32> @and_and_splatvec(<2 x i32> %arg) { ; CHECK-LABEL: @and_and_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 3) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] @@ -190,7 +190,7 @@ define i32 @f_var0_commutative_and(i32 %arg, i32 %arg1) { define <2 x i32> @f_var0_splatvec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @f_var0_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[ARG:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[T5:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> @@ -270,7 +270,7 @@ define i32 @f_var1_commutative_and(i32 %arg, i32 %arg1) { define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @f_var1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[ARG:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> @@ -285,7 +285,7 @@ define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) { define <3 x i32> @f_var1_vec_poison(<3 x i32> %arg, <3 x i32> %arg1) { ; CHECK-LABEL: @f_var1_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[ARG:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32> @@ -321,11 +321,11 @@ define i32 @f_var2(i32 %arg, i32 %arg1) { define <2 x i32> @f_var2_splatvec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @f_var2_splatvec( -; CHECK-NEXT: [[T:%.*]] = and <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 1) ; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], -; CHECK-NEXT: [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], splat (i32 1) +; CHECK-NEXT: [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T5]] ; %t = and <2 x i32> %arg, @@ -341,8 +341,8 @@ define <2 x i32> @f_var2_vec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-NEXT: [[T:%.*]] = and <2 x i32> [[ARG:%.*]], ; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], -; CHECK-NEXT: [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], splat (i32 1) +; CHECK-NEXT: [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T5]] ; %t = and <2 x i32> %arg, ; mask is not splat @@ -414,8 +414,8 @@ define <2 x i32> @f_var3_splatvec(<2 x i32> %arg, <2 x i32> %arg1, <2 x i32> %ar ; CHECK-NEXT: [[T:%.*]] = and <2 x i32> [[ARG:%.*]], [[ARG1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = lshr <2 x i32> [[ARG]], [[ARG2:%.*]] -; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], -; CHECK-NEXT: [[T6:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T5]], <2 x i32> +; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], splat (i32 1) +; CHECK-NEXT: [[T6:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T5]], <2 x i32> splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T6]] ; %t = and <2 x i32> %arg, %arg1 diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index e0306972e48e2c2..ebea5bf2eadf443 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -223,9 +223,9 @@ define i1 @andn_or_cmp_2_partial_logical_commute(i16 %a, i16 %b) { define <2 x i1> @not_logical_or(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_or( -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], -; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3) +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], splat (i32 -1) +; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[IMPLIED]] ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[COND]], <2 x i1> [[OR]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[AND]] ; @@ -240,9 +240,9 @@ define <2 x i1> @not_logical_or(i1 %b, <2 x i32> %a) { define <2 x i1> @not_logical_or2(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_or2( -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], -; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3) +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], splat (i32 -1) +; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[IMPLIED]] ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[OR]], <2 x i1> [[COND]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[AND]] ; @@ -764,10 +764,10 @@ define i1 @orn_and_cmp_2_partial_logical_commute(i16 %a, i16 %b) { define <2 x i1> @not_logical_and(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_and( -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3) +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], splat (i32 1) ; CHECK-NEXT: [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[IMPLIED]], <2 x i1> , <2 x i1> [[AND]] +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[IMPLIED]], <2 x i1> splat (i1 true), <2 x i1> [[AND]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %cond = icmp ult <2 x i32> %a, @@ -781,10 +781,10 @@ define <2 x i1> @not_logical_and(i1 %b, <2 x i32> %a) { define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_and2( -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3) +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], splat (i32 1) ; CHECK-NEXT: [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[AND]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[AND]], <2 x i1> splat (i1 true), <2 x i1> [[IMPLIED]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %cond = icmp ult <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/select-value-equivalence.ll b/llvm/test/Transforms/InstCombine/select-value-equivalence.ll index 28984282511924b..433fafc7e553b84 100644 --- a/llvm/test/Transforms/InstCombine/select-value-equivalence.ll +++ b/llvm/test/Transforms/InstCombine/select-value-equivalence.ll @@ -4,8 +4,8 @@ define <2 x i8> @select_icmp_insertelement_eq(<2 x i8> %x, <2 x i8> %y, i8 %i) { ; CHECK-LABEL: define <2 x i8> @select_icmp_insertelement_eq( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], i8 [[I:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], -; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i8> , i8 0, i8 [[I]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i8> splat (i8 2), i8 0, i8 [[I]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[INSERT]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] ; @@ -18,8 +18,8 @@ define <2 x i8> @select_icmp_insertelement_eq(<2 x i8> %x, <2 x i8> %y, i8 %i) { define <2 x i8> @select_icmp_insertelement_ne(<2 x i8> %x, <2 x i8> %y, i8 %i) { ; CHECK-LABEL: define <2 x i8> @select_icmp_insertelement_ne( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], i8 [[I:%.*]]) { -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <2 x i8> [[Y]], -; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i8> , i8 0, i8 [[I]] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i8> splat (i8 2), i8 0, i8 [[I]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP_NOT]], <2 x i8> [[INSERT]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] ; @@ -32,8 +32,8 @@ define <2 x i8> @select_icmp_insertelement_ne(<2 x i8> %x, <2 x i8> %y, i8 %i) { define <2 x i8> @select_icmp_shufflevector_identity(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i8> @select_icmp_shufflevector_identity( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], -; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> , <2 x i8> [[X]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) +; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 2), <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] ; %cmp = icmp eq <2 x i8> %y, @@ -45,7 +45,7 @@ define <2 x i8> @select_icmp_shufflevector_identity(<2 x i8> %x, <2 x i8> %y) { define <4 x i8> @select_icmp_shufflevector_select(<4 x i8> %x, <4 x i8> %y, <4 x i8> %z) { ; CHECK-LABEL: define <4 x i8> @select_icmp_shufflevector_select( ; CHECK-SAME: <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i8> [[Z:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i8> [[Y]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8> [[Z]], <4 x i8> , <4 x i32> ; CHECK-NEXT: [[RETVAL:%.*]] = select <4 x i1> [[CMP]], <4 x i8> [[SHUFFLE]], <4 x i8> [[X]] ; CHECK-NEXT: ret <4 x i8> [[RETVAL]] @@ -59,7 +59,7 @@ define <4 x i8> @select_icmp_shufflevector_select(<4 x i8> %x, <4 x i8> %y, <4 x define <2 x i8> @select_icmp_shufflevector_lanecrossing(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i8> @select_icmp_shufflevector_lanecrossing( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> poison, <2 x i32> ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[SHUFFLE]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] @@ -75,7 +75,7 @@ declare <2 x i8> @fn(<2 x i8>) speculatable define <2 x i8> @select_icmp_call_possibly_lanecrossing(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i8> @select_icmp_call_possibly_lanecrossing( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: [[CALL:%.*]] = call <2 x i8> @fn(<2 x i8> [[Y]]) ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[CALL]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] @@ -201,7 +201,7 @@ define float @select_fcmp_fadd_une_zero(float %x, float %y) { define <2 x float> @select_fcmp_fadd_oeq_not_zero_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_oeq_not_zero_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp oeq <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp oeq <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[FADD]], <2 x float> [[X]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -215,7 +215,7 @@ define <2 x float> @select_fcmp_fadd_oeq_not_zero_vec(<2 x float> %x, <2 x float define <2 x float> @select_fcmp_fadd_une_not_zero_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_une_not_zero_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp une <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp une <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[X]], <2 x float> [[FADD]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -229,7 +229,7 @@ define <2 x float> @select_fcmp_fadd_une_not_zero_vec(<2 x float> %x, <2 x float define <2 x float> @select_fcmp_fadd_ueq_nnan_not_zero_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_ueq_nnan_not_zero_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp nnan ueq <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp nnan ueq <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[FADD]], <2 x float> [[X]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -243,7 +243,7 @@ define <2 x float> @select_fcmp_fadd_ueq_nnan_not_zero_vec(<2 x float> %x, <2 x define <2 x float> @select_fcmp_fadd_one_nnan_not_zero_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_one_nnan_not_zero_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp nnan one <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp nnan one <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[X]], <2 x float> [[FADD]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -257,7 +257,7 @@ define <2 x float> @select_fcmp_fadd_one_nnan_not_zero_vec(<2 x float> %x, <2 x define <2 x float> @select_fcmp_fadd_ueq_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_ueq_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp ueq <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp ueq <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[FADD]], <2 x float> [[X]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -271,7 +271,7 @@ define <2 x float> @select_fcmp_fadd_ueq_vec(<2 x float> %x, <2 x float> %y) { define <2 x float> @select_fcmp_fadd_one_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_one_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp one <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp one <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[X]], <2 x float> [[FADD]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] diff --git a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll index 1647233595b37e2..30e763ccea59001 100644 --- a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -22,8 +22,8 @@ define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) { define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec( -; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -37,7 +37,7 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison1(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison1( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw <2 x i32> [[AND]], splat (i32 1) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -50,8 +50,8 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison1(<2 x i32> %x, <2 x i32 define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison2(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison2( -; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -64,8 +64,8 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison2(<2 x i32> %x, <2 x i32 define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison3(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison3( -; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -121,8 +121,8 @@ define i32 @select_icmp_eq_and_32_0_or_8(i32 %x, i32 %y) { define <2 x i32> @select_icmp_eq_and_32_0_or_8_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8_vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 8) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -178,8 +178,8 @@ define i32 @select_icmp_ne_0_and_4096_or_4096(i32 %x, i32 %y) { define <2 x i32> @select_icmp_ne_0_and_4096_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4096) +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], splat (i32 4096) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -234,7 +234,7 @@ define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) { define <2 x i32> @select_icmp_eq_and_4096_0_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4096) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[AND]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -290,7 +290,7 @@ define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) { define <2 x i32> @select_icmp_eq_0_and_1_or_1_vec(<2 x i64> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -392,9 +392,9 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) { define <2 x i32> @select_icmp_ne_0_and_32_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_32_or_4096_vec( -; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 4096) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 4096) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -531,7 +531,7 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_vector_of_2s( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x i32> [[Y]], <2 x i32> [[OR]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -612,8 +612,8 @@ define i64 @select_icmp_x_and_8_ne_0_y_or_8(i32 %x, i64 %y) { define <2 x i64> @select_icmp_x_and_8_ne_0_y_or_8_vec(<2 x i32> %x, <2 x i64> %y) { ; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], splat (i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[OR_Y:%.*]] = or <2 x i64> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i64> [[OR_Y]] @@ -692,8 +692,8 @@ define i32 @test68(i32 %x, i32 %y) { define <2 x i32> @test68vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test68vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 6) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -750,9 +750,9 @@ define i32 @test69(i32 %x, i32 %y) { define <2 x i32> @test69vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test69vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 6) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -1563,8 +1563,8 @@ define i8 @set_bits_extra_use2(i8 %x, i1 %b) { define <2 x i8> @clear_bits(<2 x i8> %x, <2 x i1> %b) { ; CHECK-LABEL: @clear_bits( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[MASKSEL:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 37) +; CHECK-NEXT: [[MASKSEL:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 -38) ; CHECK-NEXT: [[COND:%.*]] = or disjoint <2 x i8> [[AND]], [[MASKSEL]] ; CHECK-NEXT: ret <2 x i8> [[COND]] ; @@ -1579,7 +1579,7 @@ define <2 x i8> @clear_bits(<2 x i8> %x, <2 x i1> %b) { define <2 x i8> @clear_bits_not_inverse_constant(<2 x i8> %x, <2 x i1> %b) { ; CHECK-LABEL: @clear_bits_not_inverse_constant( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[X]], splat (i8 -38) ; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> [[AND]], <2 x i8> [[OR]] ; CHECK-NEXT: ret <2 x i8> [[COND]] ; @@ -1591,9 +1591,9 @@ define <2 x i8> @clear_bits_not_inverse_constant(<2 x i8> %x, <2 x i1> %b) { define <2 x i8> @clear_bits_extra_use1(<2 x i8> %x, i1 %b) { ; CHECK-LABEL: @clear_bits_extra_use1( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 37) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[AND]]) -; CHECK-NEXT: [[MASKSEL:%.*]] = select i1 [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> +; CHECK-NEXT: [[MASKSEL:%.*]] = select i1 [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 -38) ; CHECK-NEXT: [[COND:%.*]] = or disjoint <2 x i8> [[AND]], [[MASKSEL]] ; CHECK-NEXT: ret <2 x i8> [[COND]] ; diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 7d62b419424405b..8b394afc70aca59 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -34,7 +34,7 @@ define i1 @trueval_is_true(i1 %C, i1 %X) { define <2 x i1> @trueval_is_true_vec(<2 x i1> %C, <2 x i1> %X) { ; CHECK-LABEL: @trueval_is_true_vec( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> , <2 x i1> [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[X:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %R = select <2 x i1> %C, <2 x i1> , <2 x i1> %X @@ -89,7 +89,7 @@ define i1 @test9(i1 %C, i1 %X) { define <2 x i1> @test9vec(<2 x i1> %C, <2 x i1> %X) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], +; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_C]], <2 x i1> [[X:%.*]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -119,8 +119,8 @@ define i1 @test10(i1 %C, i1 %X) { define <2 x i1> @test10vec(<2 x i1> %C, <2 x i1> %X) { ; CHECK-LABEL: @test10vec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_C]], <2 x i1> , <2 x i1> [[X:%.*]] +; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], splat (i1 true) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_C]], <2 x i1> splat (i1 true), <2 x i1> [[X:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %R = select <2 x i1> %C, <2 x i1> %X, <2 x i1> @@ -156,7 +156,7 @@ define i1 @test24(i1 %a, i1 %b) { define <2 x i1> @test24vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @test24vec( -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; %c = select <2 x i1> %a, <2 x i1> %a, <2 x i1> %b @@ -176,7 +176,7 @@ define i1 @test62(i1 %A, i1 %B) { define <2 x i1> @test62vec(<2 x i1> %A, <2 x i1> %B) { ; CHECK-LABEL: @test62vec( -; CHECK-NEXT: [[NOT_A:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT_A:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[NOT_A]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -198,8 +198,8 @@ define i1 @test63(i1 %A, i1 %B) { define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) { ; CHECK-LABEL: @test63vec( -; CHECK-NEXT: [[NOT_A:%.*]] = xor <2 x i1> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[NOT_A]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[NOT_A:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[NOT_A]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; %not = xor <2 x i1> %A, @@ -276,7 +276,7 @@ define i32 @test12b(i1 %cond, i32 %a) { define <2 x i32> @test12bvec(<2 x i1> %cond, <2 x i32> %a) { ; CHECK-LABEL: @test12bvec( -; CHECK-NEXT: [[NOT_COND:%.*]] = xor <2 x i1> [[COND:%.*]], +; CHECK-NEXT: [[NOT_COND:%.*]] = xor <2 x i1> [[COND:%.*]], splat (i1 true) ; CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[NOT_COND]] to <2 x i32> ; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[A:%.*]], [[B]] ; CHECK-NEXT: ret <2 x i32> [[D]] @@ -1447,7 +1447,7 @@ define i32 @select_icmp_slt0_xor(i32 %x) { define <2 x i32> @select_icmp_slt0_xor_vec(<2 x i32> %x) { ; CHECK-LABEL: @select_icmp_slt0_xor_vec( -; CHECK-NEXT: [[X_XOR:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_XOR:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[X_XOR]] ; %cmp = icmp slt <2 x i32> %x, zeroinitializer @@ -1531,7 +1531,7 @@ define <4 x float> @PR33721(<4 x float> %w) { ; CHECK-LABEL: @PR33721( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <4 x float> [[W:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> , <4 x float> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> splat (float 0xFFFFFFFFE0000000), <4 x float> zeroinitializer ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; entry: @@ -1603,7 +1603,7 @@ define i32 @test_shl_zext_bool(i1 %t) { define <2 x i32> @test_shl_zext_bool_splat(<2 x i1> %t) { ; CHECK-LABEL: @test_shl_zext_bool_splat( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> splat (i32 8), <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; %r = select <2 x i1> %t, <2 x i32> , <2 x i32> zeroinitializer @@ -1644,7 +1644,7 @@ define float @copysign1_fmf(float %x) { define <2 x float> @copysign2(<2 x float> %x) { ; CHECK-LABEL: @copysign2( ; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[TMP1]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to <2 x i32> @@ -1668,7 +1668,7 @@ define float @copysign3(float %x) { define <2 x float> @copysign_vec_poison(<2 x float> %x) { ; CHECK-LABEL: @copysign_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[TMP1]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to <2 x i32> @@ -1679,7 +1679,7 @@ define <2 x float> @copysign_vec_poison(<2 x float> %x) { define <2 x float> @copysign_vec_poison1(<2 x float> %x) { ; CHECK-LABEL: @copysign_vec_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to <2 x i32> @@ -1690,7 +1690,7 @@ define <2 x float> @copysign_vec_poison1(<2 x float> %x) { define <2 x float> @copysign_vec_poison3(<2 x float> %x) { ; CHECK-LABEL: @copysign_vec_poison3( -; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to <2 x i32> @@ -1739,7 +1739,7 @@ define <2 x float> @copysign_type_mismatch2(<2 x float> %x) { ; CHECK-LABEL: @copysign_type_mismatch2( ; CHECK-NEXT: [[I:%.*]] = bitcast <2 x float> [[X:%.*]] to i64 ; CHECK-NEXT: [[ISPOS:%.*]] = icmp sgt i64 [[I]], -1 -; CHECK-NEXT: [[R:%.*]] = select i1 [[ISPOS]], <2 x float> , <2 x float> +; CHECK-NEXT: [[R:%.*]] = select i1 [[ISPOS]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float -1.000000e+00) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to i64 @@ -2762,8 +2762,8 @@ define i8 @select_replacement_add_eq(i8 %x, i8 %y) { define <2 x i8> @select_replacement_add_eq_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @select_replacement_add_eq_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> , <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 2), <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; %cmp = icmp eq <2 x i8> %x, @@ -2799,7 +2799,7 @@ define <2 x i8> @select_replacement_add_eq_vec_poison(<2 x i8> %x, <2 x i8> %y) define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @select_replacement_add_eq_vec_undef( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], splat (i8 1) ; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; @@ -2811,7 +2811,7 @@ define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @select_replacement_add_eq_vec_undef_okay(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @select_replacement_add_eq_vec_undef_okay( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> , <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; @@ -3370,7 +3370,7 @@ define i32 @select_cond_zext_cond(i1 %cond, i32 %b) { define <2 x i32> @select_cond_zext_cond_vec(<2 x i1> %cond, <2 x i32> %b) { ; CHECK-LABEL: @select_cond_zext_cond_vec( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> , <2 x i32> [[B:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> splat (i32 1), <2 x i32> [[B:%.*]] ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %zext = zext <2 x i1> %cond to <2 x i32> @@ -3390,7 +3390,7 @@ define i32 @select_cond_sext_cond(i1 %cond, i32 %b) { define <2 x i32> @select_cond_sext_cond_vec(<2 x i1> %cond, <2 x i32> %b) { ; CHECK-LABEL: @select_cond_sext_cond_vec( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> , <2 x i32> [[B:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> splat (i32 -1), <2 x i32> [[B:%.*]] ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %sext = sext <2 x i1> %cond to <2 x i32> @@ -3582,7 +3582,7 @@ define i32 @not_clamp_umin2(i32 %x) { define <2 x i8> @clamp_umaxval(<2 x i8> %x) { ; CHECK-LABEL: @clamp_umaxval( -; CHECK-NEXT: [[SEL:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[SEL:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 -2)) ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; %cmp = icmp eq <2 x i8> %x, @@ -3667,7 +3667,7 @@ define i8 @not_clamp_smin2(i8 %x) { define <2 x i8> @clamp_smaxval(<2 x i8> %x) { ; CHECK-LABEL: @clamp_smaxval( -; CHECK-NEXT: [[SEL:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[SEL:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 126)) ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; %cmp = icmp eq <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll index d8f945b8d1b32f8..f22b5a4fd3c5f20 100644 --- a/llvm/test/Transforms/InstCombine/select_meta.ll +++ b/llvm/test/Transforms/InstCombine/select_meta.ll @@ -371,7 +371,7 @@ define double @select_fmul(i1 %cond, double %x, double %y) { define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @select_fdiv( -; CHECK-NEXT: [[OP:%.*]] = select nnan i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> , !prof [[PROF0]], !unpredictable [[META2]] +; CHECK-NEXT: [[OP:%.*]] = select nnan i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> splat (float 1.000000e+00), !prof [[PROF0]], !unpredictable [[META2]] ; CHECK-NEXT: [[RET:%.*]] = fdiv <2 x float> [[X:%.*]], [[OP]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll index a3c8d3393d04faa..e6e1ea689521d8a 100644 --- a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll +++ b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll @@ -187,8 +187,8 @@ define i32 @shl_nsw_nuw_add_nsw_nuw(i32 %NBits) { define <2 x i32> @shl_add_vec(<2 x i32> %NBits) { ; CHECK-LABEL: @shl_add_vec( -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <2 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[RET:%.*]] = xor <2 x i32> [[NOTMASK]], +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <2 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[RET:%.*]] = xor <2 x i32> [[NOTMASK]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %setbit = shl <2 x i32> , %NBits @@ -198,8 +198,8 @@ define <2 x i32> @shl_add_vec(<2 x i32> %NBits) { define <3 x i32> @shl_add_vec_poison0(<3 x i32> %NBits) { ; CHECK-LABEL: @shl_add_vec_poison0( -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %setbit = shl <3 x i32> , %NBits @@ -209,8 +209,8 @@ define <3 x i32> @shl_add_vec_poison0(<3 x i32> %NBits) { define <3 x i32> @shl_add_vec_poison1(<3 x i32> %NBits) { ; CHECK-LABEL: @shl_add_vec_poison1( -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %setbit = shl <3 x i32> , %NBits @@ -220,8 +220,8 @@ define <3 x i32> @shl_add_vec_poison1(<3 x i32> %NBits) { define <3 x i32> @shl_add_vec_poison2(<3 x i32> %NBits) { ; CHECK-LABEL: @shl_add_vec_poison2( -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %setbit = shl <3 x i32> , %NBits diff --git a/llvm/test/Transforms/InstCombine/set.ll b/llvm/test/Transforms/InstCombine/set.ll index f44ac83f7f59161..214e9adb9bd545c 100644 --- a/llvm/test/Transforms/InstCombine/set.ll +++ b/llvm/test/Transforms/InstCombine/set.ll @@ -144,7 +144,7 @@ define i1 @test13(i1 %A, i1 %B) { define <2 x i1> @test13vec(<2 x i1> %A, <2 x i1> %B) { ; CHECK-LABEL: @test13vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[B:%.*]], splat (i1 true) ; CHECK-NEXT: [[C:%.*]] = or <2 x i1> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -165,7 +165,7 @@ define i1 @test14(i1 %A, i1 %B) { define <3 x i1> @test14vec(<3 x i1> %A, <3 x i1> %B) { ; CHECK-LABEL: @test14vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[C:%.*]] = xor <3 x i1> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = xor <3 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: ret <3 x i1> [[C]] ; %C = icmp eq <3 x i1> %A, %B @@ -257,7 +257,7 @@ define i1 @xor_of_icmps_neg_to_ne(i64 %a) { define <2 x i1> @xor_of_icmps_to_ne_vector(<2 x i64> %a) { ; CHECK-LABEL: @xor_of_icmps_to_ne_vector( -; CHECK-NEXT: [[XOR:%.*]] = icmp ne <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[XOR:%.*]] = icmp ne <2 x i64> [[A:%.*]], splat (i64 5) ; CHECK-NEXT: ret <2 x i1> [[XOR]] ; %b = icmp sgt <2 x i64> %a, @@ -419,7 +419,7 @@ define i32 @test20(i32 %A) { define <2 x i32> @test20vec(<2 x i32> %A) { ; CHECK-LABEL: @test20vec( -; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %B = and <2 x i32> %A, @@ -442,8 +442,8 @@ define i32 @test21(i32 %a) { define <2 x i32> @test21vec(<2 x i32> %a) { ; CHECK-LABEL: @test21vec( -; CHECK-NEXT: [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], +; CHECK-NEXT: [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 2) +; CHECK-NEXT: [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP_6_LOBIT]] ; %tmp.6 = and <2 x i32> %a, @@ -490,8 +490,8 @@ define i32 @test23(i32 %a) { define <2 x i32> @test23vec(<2 x i32> %a) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP_3:%.*]] = xor <2 x i32> [[TMP_1]], +; CHECK-NEXT: [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP_3:%.*]] = xor <2 x i32> [[TMP_1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP_3]] ; %tmp.1 = and <2 x i32> %a, @@ -516,9 +516,9 @@ define i32 @test24(i32 %a) { define <2 x i32> @test24vec(<2 x i32> %a) { ; CHECK-LABEL: @test24vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[DOTLOBIT:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP_3:%.*]] = xor <2 x i32> [[DOTLOBIT]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 2) +; CHECK-NEXT: [[DOTLOBIT:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP_3:%.*]] = xor <2 x i32> [[DOTLOBIT]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP_3]] ; %tmp1 = and <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll index b992460d0be6981..516f1a2bfd5c533 100644 --- a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll +++ b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll @@ -55,7 +55,7 @@ define i16 @n2(i8 %x) { define <2 x i16> @t3_vec(<2 x i8> %x) { ; CHECK-LABEL: @t3_vec( -; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: call void @usevec(<2 x i8> [[A]]) ; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[A]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll index a554f2b28d6f299..ee3c52259f930a5 100644 --- a/llvm/test/Transforms/InstCombine/sext.ll +++ b/llvm/test/Transforms/InstCombine/sext.ll @@ -143,8 +143,8 @@ define i32 @test10(i32 %i) { define <2 x i32> @test10_vec(<2 x i32> %i) { ; CHECK-LABEL: @test10_vec( -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], splat (i32 30) +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %A = trunc <2 x i32> %i to <2 x i8> @@ -338,8 +338,8 @@ define <2 x i32> @smear_set_bit_vec_use1(<2 x i32> %x) { ; CHECK-LABEL: @smear_set_bit_vec_use1( ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i5> ; CHECK-NEXT: call void @use_vec(<2 x i5> [[T]]) -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X]], -; CHECK-NEXT: [[S:%.*]] = ashr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X]], splat (i32 27) +; CHECK-NEXT: [[S:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[S]] ; %t = trunc <2 x i32> %x to <2 x i5> diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll index 016f877a9efb518..81cbc2ac23b5f2c 100644 --- a/llvm/test/Transforms/InstCombine/shift-add.ll +++ b/llvm/test/Transforms/InstCombine/shift-add.ll @@ -289,7 +289,7 @@ define i8 @lshr_exact_add_negative_shift_positive_extra_use(i8 %x) { define <2 x i9> @lshr_exact_add_negative_shift_positive_vec(<2 x i9> %x) { ; CHECK-LABEL: @lshr_exact_add_negative_shift_positive_vec( -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i9> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i9> splat (i9 -256), [[X:%.*]] ; CHECK-NEXT: ret <2 x i9> [[R]] ; %a = add <2 x i9> %x, @@ -301,8 +301,8 @@ define <2 x i9> @lshr_exact_add_negative_shift_positive_vec(<2 x i9> %x) { define <2 x i9> @lshr_exact_add_negative_shift_lzcnt(<2 x i9> %x) { ; CHECK-LABEL: @lshr_exact_add_negative_shift_lzcnt( -; CHECK-NEXT: [[A:%.*]] = add <2 x i9> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i9> , [[A]] +; CHECK-NEXT: [[A:%.*]] = add <2 x i9> [[X:%.*]], splat (i9 -7) +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i9> splat (i9 4), [[A]] ; CHECK-NEXT: ret <2 x i9> [[R]] ; %a = add <2 x i9> %x, @@ -372,7 +372,7 @@ define i8 @ashr_exact_add_negative_shift_negative_extra_use(i8 %x) { define <2 x i7> @ashr_exact_add_negative_shift_negative_vec(<2 x i7> %x) { ; CHECK-LABEL: @ashr_exact_add_negative_shift_negative_vec( -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i7> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i7> splat (i7 -64), [[X:%.*]] ; CHECK-NEXT: ret <2 x i7> [[R]] ; %a = add <2 x i7> %x, @@ -384,8 +384,8 @@ define <2 x i7> @ashr_exact_add_negative_shift_negative_vec(<2 x i7> %x) { define <2 x i7> @ashr_exact_add_negative_leading_ones_vec(<2 x i7> %x) { ; CHECK-LABEL: @ashr_exact_add_negative_leading_ones_vec( -; CHECK-NEXT: [[A:%.*]] = add <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i7> , [[A]] +; CHECK-NEXT: [[A:%.*]] = add <2 x i7> [[X:%.*]], splat (i7 -5) +; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i7> splat (i7 -4), [[A]] ; CHECK-NEXT: ret <2 x i7> [[R]] ; %a = add <2 x i7> %x, @@ -410,9 +410,9 @@ define i32 @shl_nsw_add_negative(i32 %x) { define <2 x i8> @shl_nuw_add_negative_splat_uses(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @shl_nuw_add_negative_splat_uses( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -2) ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i8> , [[X]] +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i8> splat (i8 3), [[X]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll index c4260f4cb2bf885..c134833630338c3 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll @@ -165,7 +165,7 @@ define i1 @n4(i32 %x, i32 %len) { ; New shift amount would be 16, minimal count of leading zeros in %x is 16. Ok. define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t5_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], splat (i64 16) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], ; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T5]] @@ -183,9 +183,9 @@ define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) { ; New shift amount would be 16, minimal count of leading zeros in %x is 15, not ok to fold. define <2 x i1> @n6_vec(<2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @n6_vec( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> splat (i32 32), [[LEN:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], splat (i32 -16) ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] ; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> @@ -224,9 +224,9 @@ define <2 x i1> @t7_vec(<2 x i32> %x, <2 x i32> %len) { ; New shift amount would be 16, minimal count of leading zeros in %x is 48, not ok to fold. define <2 x i1> @n8_vec(<2 x i32> %x, <2 x i32> %len) { ; CHECK-LABEL: @n8_vec( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> splat (i32 32), [[LEN:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], splat (i32 -16) ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> , [[T2_WIDE]] ; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc nuw nsw <2 x i64> [[T3]] to <2 x i32> @@ -335,7 +335,7 @@ define i1 @t10_shift_by_one(i32 %x, i64 %y, i32 %len) { ; A mix of those conditions is ok. define <2 x i1> @t11_zero_and_almost_bitwidth(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t11_zero_and_almost_bitwidth( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> splat (i32 64), [[LEN:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> @@ -357,7 +357,7 @@ define <2 x i1> @t11_zero_and_almost_bitwidth(<2 x i32> %x, <2 x i64> %y, <2 x i } define <2 x i1> @n12_bad(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @n12_bad( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> splat (i32 64), [[LEN:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> @@ -414,7 +414,7 @@ define i1 @t14_x_is_one(i32 %x, i32 %len) { define <2 x i1> @t15_vec_x_is_one_or_zero(<2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t15_vec_x_is_one_or_zero( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], splat (i64 48) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], ; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T5]] diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll index 6e9552e2af4ccee..6f4e78e9f91a0f0 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll @@ -38,7 +38,7 @@ define i1 @t0_const_after_fold_lshr_shl_ne(i32 %x, i64 %y, i32 %len) { define <2 x i1> @t1_vec_splat(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer @@ -363,7 +363,7 @@ define i1 @t10_constants(i32 %x, i64 %y) { define <2 x i1> @t11_constants_vec_splat(<2 x i32> %x, <2 x i64> %y) { ; CHECK-LABEL: @t11_constants_vec_splat( ; CHECK-NEXT: [[Y_TR:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 26) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y_TR]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T3]] @@ -378,7 +378,7 @@ define <2 x i1> @t11_constants_vec_splat(<2 x i32> %x, <2 x i64> %y) { define <2 x i1> @t12_constants_vec_nonsplat(<2 x i32> %x, <2 x i64> %y) { ; CHECK-LABEL: @t12_constants_vec_nonsplat( ; CHECK-NEXT: [[Y_TR:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 28) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y_TR]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T3]] diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll index 00a19e4962e6ca0..070a3b033021245 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll @@ -119,7 +119,7 @@ define i1 @t6_const_shl_lshr_ne(i32 %x, i32 %y, i32 %shamt0, i32 %shamt1) { define <2 x i1> @t7_const_lshr_shl_ne_vec_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t7_const_lshr_shl_ne_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T3]] @@ -171,7 +171,7 @@ define <3 x i1> @t10_const_lshr_shl_ne_vec_poison1(<3 x i32> %x, <3 x i32> %y) { } define <3 x i1> @t11_const_lshr_shl_ne_vec_poison2(<3 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @t11_const_lshr_shl_ne_vec_poison2( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[T3]] diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll index 9efc30cc9d916eb..78ac4cbabde7d92 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll @@ -29,7 +29,7 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[T5:%.*]] = trunc nsw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[T5]] ; diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll index c31b6ed3ea2ba90..937345b00c03390 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll @@ -29,7 +29,7 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[T5]] ; diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll index 073013b34a3baa5..0491ce1def8488d 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll @@ -30,7 +30,7 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[X_TR:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16> -; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[X_TR]], +; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[X_TR]], splat (i16 8) ; CHECK-NEXT: ret <2 x i16> [[T5]] ; %t0 = sub <2 x i16> , %y diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll index 6bbe4c5151e4583..b4c606f037d561a 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll @@ -24,7 +24,7 @@ define i32 @t0(i32 %x, i32 %y) { define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t0 = sub <2 x i32> , %y @@ -36,7 +36,7 @@ define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @t2_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t2_vec_nonsplat( -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t0 = sub <2 x i32> , %y diff --git a/llvm/test/Transforms/InstCombine/shift-logic.ll b/llvm/test/Transforms/InstCombine/shift-logic.ll index 593a22bec6490bc..ab8d98a9523ba89 100644 --- a/llvm/test/Transforms/InstCombine/shift-logic.ll +++ b/llvm/test/Transforms/InstCombine/shift-logic.ll @@ -46,7 +46,7 @@ define i16 @shl_or(i16 %x, i16 %py) { define <2 x i16> @shl_or_poison(<2 x i16> %x, <2 x i16> %py) { ; CHECK-LABEL: @shl_or_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], splat (i16 42) ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i16> [[Y]], ; CHECK-NEXT: [[SH1:%.*]] = or <2 x i16> [[TMP1]], [[TMP2]] @@ -102,7 +102,7 @@ define i64 @lshr_and(i64 %x, i64 %py) { define <2 x i64> @lshr_and_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @lshr_and_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[Y]], ; CHECK-NEXT: [[SH1:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]] @@ -117,8 +117,8 @@ define <2 x i64> @lshr_and_poison(<2 x i64> %x, <2 x i64> %py) { define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @lshr_or( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], splat (i32 12) +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Y:%.*]], splat (i32 7) ; CHECK-NEXT: [[SH1:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[SH1]] ; @@ -130,9 +130,9 @@ define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) { define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) { ; CHECK-LABEL: @lshr_xor( -; CHECK-NEXT: [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[Y]], +; CHECK-NEXT: [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], splat (i16 42) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[X:%.*]], splat (i16 12) +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[Y]], splat (i16 7) ; CHECK-NEXT: [[SH1:%.*]] = xor <8 x i16> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x i16> [[SH1]] ; @@ -146,8 +146,8 @@ define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) { define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) { ; CHECK-LABEL: @ashr_and( ; CHECK-NEXT: [[Y:%.*]] = srem <16 x i8> [[PY:%.*]], [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i8> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i8> [[X:%.*]], splat (i8 5) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: [[SH1:%.*]] = and <16 x i8> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <16 x i8> [[SH1]] ; @@ -160,8 +160,8 @@ define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) { define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @ashr_or( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 12) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i64> [[Y:%.*]], splat (i64 7) ; CHECK-NEXT: [[SH1:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i64> [[SH1]] ; @@ -281,7 +281,7 @@ define i64 @lshr_mul_nuw_nsw(i64 %0) { define <4 x i32> @lshr_mul_vector(<4 x i32> %0) { ; CHECK-LABEL: @lshr_mul_vector( -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[TMP0:%.*]], splat (i32 13) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %2 = mul nuw <4 x i32> %0, @@ -394,7 +394,7 @@ define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) { define <2 x i64> @shl_add_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @shl_add_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i64> [[Y]], ; CHECK-NEXT: [[SH1:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] @@ -436,7 +436,7 @@ define <2 x i8> @lshr_add_nonuniform(<2 x i8> %x, <2 x i8> %y) { define <2 x i64> @lshr_add_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @lshr_add_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i64> [[Y]], [[SH0]] ; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i64> [[R]], @@ -492,7 +492,7 @@ define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) { define <2 x i64> @shl_sub_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @shl_sub_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i64> [[Y]], ; CHECK-NEXT: [[SH1:%.*]] = sub <2 x i64> [[TMP2]], [[TMP1]] @@ -534,7 +534,7 @@ define <2 x i8> @lshr_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) { define <2 x i64> @lshr_sub_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @lshr_sub_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i64> [[Y]], [[SH0]] ; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i64> [[R]], diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll index 7c35718601ba7f6..fa58de3528d37b3 100644 --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -486,7 +486,7 @@ define i8 @shl_lshr_demand4(i8 %x) { define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> splat (i8 37), [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; @@ -501,7 +501,7 @@ define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) { define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_undef_left( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], splat (i8 2) ; CHECK-NEXT: [[R:%.*]] = trunc nuw <2 x i8> [[LSHR]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; @@ -515,7 +515,7 @@ define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) { define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_undef_right( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> splat (i8 -108), [[X:%.*]] ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] @@ -530,7 +530,7 @@ define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) { define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> splat (i8 -108), [[X:%.*]] ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] @@ -649,8 +649,8 @@ define i8 @lshr_shl_demand4(i8 %x) { define <2 x i8> @lshr_shl_demand5(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> splat (i8 -76), [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101 @@ -663,9 +663,9 @@ define <2 x i8> @lshr_shl_demand5(<2 x i8> %x) { define <2 x i8> @lshr_shl_demand5_undef_left(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5_undef_left( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> splat (i8 45), [[X:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101 @@ -679,8 +679,8 @@ define <2 x i8> @lshr_shl_demand5_undef_left(<2 x i8> %x) { define <2 x i8> @lshr_shl_demand5_undef_right(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5_undef_right( ; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], splat (i8 2) +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101 @@ -693,9 +693,9 @@ define <2 x i8> @lshr_shl_demand5_undef_right(<2 x i8> %x) { define <2 x i8> @lshr_shl_demand5_nonuniform_vec_left(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_left( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> splat (i8 45), [[X:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101 @@ -709,7 +709,7 @@ define <2 x i8> @lshr_shl_demand5_nonuniform_vec_left(<2 x i8> %x) { define <2 x i8> @lshr_shl_demand5_nonuniform_vec_right(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_right( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101. 0b0000_1101 diff --git a/llvm/test/Transforms/InstCombine/shift-sra.ll b/llvm/test/Transforms/InstCombine/shift-sra.ll index 82bce5f02554ee9..d631da0d444adbc 100644 --- a/llvm/test/Transforms/InstCombine/shift-sra.ll +++ b/llvm/test/Transforms/InstCombine/shift-sra.ll @@ -143,7 +143,7 @@ define i32 @ashr_overshift(i32 %x) { define <2 x i32> @ashr_ashr_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @ashr_ashr_splat_vec( -; CHECK-NEXT: [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 12) ; CHECK-NEXT: ret <2 x i32> [[SH2]] ; %sh1 = ashr <2 x i32> %x, @@ -155,7 +155,7 @@ define <2 x i32> @ashr_ashr_splat_vec(<2 x i32> %x) { define <2 x i32> @ashr_overshift_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @ashr_overshift_splat_vec( -; CHECK-NEXT: [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[SH2]] ; %sh1 = ashr <2 x i32> %x, @@ -180,7 +180,7 @@ define i32 @hoist_ashr_ahead_of_sext_1(i8 %x) { define <2 x i32> @hoist_ashr_ahead_of_sext_1_splat(<2 x i8> %x) { ; CHECK-LABEL: @hoist_ashr_ahead_of_sext_1_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -206,7 +206,7 @@ define i32 @hoist_ashr_ahead_of_sext_2(i8 %x) { define <2 x i32> @hoist_ashr_ahead_of_sext_2_splat(<2 x i8> %x) { ; CHECK-LABEL: @hoist_ashr_ahead_of_sext_2_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 986e1073c638913..d2ee97f39123b0d 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -235,8 +235,8 @@ define i1 @test17(i32 %A) { define <2 x i1> @test17vec(<2 x i32> %A) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -8) +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], splat (i32 9872) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = lshr <2 x i32> %A, @@ -267,7 +267,7 @@ define i1 @test19(i32 %A) { define <2 x i1> @test19vec(<2 x i32> %A) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i32> %A, @@ -288,7 +288,7 @@ define i1 @test19a(i32 %A) { define <2 x i1> @test19a_vec(<2 x i32> %A) { ; CHECK-LABEL: @test19a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -5) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i32> %A, @@ -433,7 +433,7 @@ entry: define <2 x i32> @test29_uniform(<2 x i64> %d18) { ; CHECK-LABEL: @test29_uniform( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i64> [[D18:%.*]], +; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i64> [[D18:%.*]], splat (i64 63) ; CHECK-NEXT: [[I101:%.*]] = trunc nuw nsw <2 x i64> [[SUM_SHIFT]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[I101]] ; @@ -523,7 +523,7 @@ define i1 @test33(i32 %X) { define <2 x i1> @test33vec(<2 x i32> %X) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[I1_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[I1_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 16777216) ; CHECK-NEXT: [[I2:%.*]] = icmp ne <2 x i32> [[I1_MASK]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[I2]] ; @@ -594,7 +594,7 @@ define i64 @test37(i128 %A, i32 %B) { define <2 x i32> @shl_nuw_nsw_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], splat (i32 17) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> @@ -615,8 +615,8 @@ define i32 @test38(i32 %x) { define <2 x i32> @test38_uniform(<2 x i32> %x) { ; CHECK-LABEL: @test38_uniform( -; CHECK-NEXT: [[REM1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <2 x i32> , [[REM1]] +; CHECK-NEXT: [[REM1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31) +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <2 x i32> splat (i32 1), [[REM1]] ; CHECK-NEXT: ret <2 x i32> [[SHL]] ; %rem = srem <2 x i32> %x, @@ -627,7 +627,7 @@ define <2 x i32> @test38_uniform(<2 x i32> %x) { define <3 x i32> @test38_nonuniform(<3 x i32> %x) { ; CHECK-LABEL: @test38_nonuniform( ; CHECK-NEXT: [[REM1:%.*]] = and <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <3 x i32> , [[REM1]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <3 x i32> splat (i32 1), [[REM1]] ; CHECK-NEXT: ret <3 x i32> [[SHL]] ; %rem = srem <3 x i32> %x, @@ -685,7 +685,7 @@ define i32 @test42(i32 %a, i32 %b) { define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test42vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> splat (i32 12), [[B:%.*]] ; CHECK-NEXT: [[DIV2:%.*]] = lshr <2 x i32> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[DIV2]] ; @@ -767,7 +767,7 @@ define i32 @test46(i32 %a) { define <2 x i32> @test46_splat_vec(<2 x i32> %a) { ; CHECK-LABEL: @test46_splat_vec( -; CHECK-NEXT: [[Z:%.*]] = ashr exact <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[Z:%.*]] = ashr exact <2 x i32> [[A:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %y = ashr exact <2 x i32> %a, @@ -791,7 +791,7 @@ define i8 @test47(i8 %a) { define <2 x i8> @test47_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @test47_splat_vec( -; CHECK-NEXT: [[Z:%.*]] = lshr exact <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[Z:%.*]] = lshr exact <2 x i8> [[A:%.*]], splat (i8 2) ; CHECK-NEXT: ret <2 x i8> [[Z]] ; %y = lshr exact <2 x i8> %a, @@ -827,7 +827,7 @@ define i32 @test48_nuw_nsw(i32 %x) { define <2 x i32> @test48_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test48_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = lshr exact <2 x i32> %x, @@ -863,7 +863,7 @@ define i32 @test49_nuw_nsw(i32 %x) { define <2 x i32> @test49_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test49_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = ashr exact <2 x i32> %x, @@ -888,7 +888,7 @@ define i32 @test50(i32 %x) { define <2 x i32> @test50_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test50_splat_vec( -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nsw <2 x i32> %x, @@ -913,7 +913,7 @@ define i32 @test51(i32 %x) { define <2 x i32> @test51_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test51_splat_vec( -; CHECK-NEXT: [[B:%.*]] = lshr exact <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = lshr exact <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nuw <2 x i32> %x, @@ -939,8 +939,8 @@ define i32 @test51_no_nuw(i32 %x) { define <2 x i32> @test51_no_nuw_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test51_no_nuw_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2) +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[TMP1]], splat (i32 536870911) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl <2 x i32> %x, @@ -964,7 +964,7 @@ define i32 @test52(i32 %x) { define <2 x i32> @test52_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test52_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nsw <2 x i32> %x, @@ -988,7 +988,7 @@ define i32 @test53(i32 %x) { define <2 x i32> @test53_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test53_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nuw <2 x i32> %x, @@ -1013,8 +1013,8 @@ define i8 @test53_no_nuw(i8 %x) { define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @test53_no_nuw_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 2) +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 124) ; CHECK-NEXT: ret <2 x i8> [[B]] ; %A = shl <2 x i8> %x, @@ -1036,8 +1036,8 @@ define i32 @test54(i32 %x) { define <2 x i32> @test54_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test54_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 3) +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[AND]] ; %shr2 = lshr <2 x i32> %x, @@ -1098,8 +1098,8 @@ define i32 @test58(i32 %x) { define <2 x i32> @test58_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test58_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 3) +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[OR]] ; %shr = ashr <2 x i32> %x, @@ -1213,8 +1213,8 @@ define <4 x i32> @test62_non_splat_vector(<4 x i32> %a) { define <2 x i65> @test_63(<2 x i64> %t) { ; CHECK-LABEL: @test_63( ; CHECK-NEXT: [[A:%.*]] = zext <2 x i64> [[T:%.*]] to <2 x i65> -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i65> [[A]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i65> [[SEXT]], +; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i65> [[A]], splat (i65 33) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i65> [[SEXT]], splat (i65 33) ; CHECK-NEXT: ret <2 x i65> [[B]] ; %a = zext <2 x i64> %t to <2 x i65> @@ -1235,7 +1235,7 @@ define i32 @test_shl_zext_bool(i1 %t) { define <2 x i32> @test_shl_zext_bool_splat(<2 x i1> %t) { ; CHECK-LABEL: @test_shl_zext_bool_splat( -; CHECK-NEXT: [[SHL:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[SHL:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> splat (i32 8), <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[SHL]] ; %ext = zext <2 x i1> %t to <2 x i32> @@ -1293,7 +1293,7 @@ define i64 @shl_zext_extra_use(i32 %t) { define <2 x i64> @shl_zext_splat_vec(<2 x i32> %t) { ; CHECK-LABEL: @shl_zext_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[T:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[T:%.*]], splat (i32 8) ; CHECK-NEXT: [[SHL:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SHL]] ; @@ -1318,9 +1318,9 @@ define i64 @shl_zext_mul(i32 %t) { define <3 x i17> @shl_zext_mul_splat(<3 x i5> %t) { ; CHECK-LABEL: @shl_zext_mul_splat( -; CHECK-NEXT: [[MUL:%.*]] = mul <3 x i5> [[T:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <3 x i5> [[T:%.*]], splat (i5 13) ; CHECK-NEXT: [[EXT:%.*]] = zext <3 x i5> [[MUL]] to <3 x i17> -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <3 x i17> [[EXT]], +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <3 x i17> [[EXT]], splat (i17 12) ; CHECK-NEXT: ret <3 x i17> [[SHL]] ; %mul = mul <3 x i5> %t, @@ -1374,7 +1374,7 @@ define i64 @shl_zext_mul_extra_use2(i32 %t) { define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) { ; CHECK-LABEL: @ashr_demanded_bits_splat( -; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[SHR]] ; %and = and <2 x i8> %x, @@ -1394,7 +1394,7 @@ define @ashr_demanded_bits_splat2( %x) { define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) { ; CHECK-LABEL: @lshr_demanded_bits_splat( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[SHR]] ; %and = and <2 x i8> %x, @@ -1913,7 +1913,7 @@ define i64 @lshr_mul_negpow2_2(i64 %x) { define <2 x i32> @lshr_mul_negpow2_3(<2 x i32> %x) { ; CHECK-LABEL: @lshr_mul_negpow2_3( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], splat (i32 255) ; CHECK-NEXT: ret <2 x i32> [[A]] ; %a = mul <2 x i32> %x, @@ -1937,8 +1937,8 @@ define i32 @lshr_mul_negpow2_4(i32 %x) { define <2 x i32> @lshr_mul_negpow2_5(<2 x i32> %x) { ; CHECK-LABEL: @lshr_mul_negpow2_5( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[B:%.*]] = or disjoint <2 x i32> [[A]], +; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65527) +; CHECK-NEXT: [[B:%.*]] = or disjoint <2 x i32> [[A]], splat (i32 8) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %a = mul <2 x i32> %x, @@ -1973,7 +1973,7 @@ define i8 @ashr_sdiv_pos(i8 %x) { define <2 x i8> @ashr_sdiv_neg_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @ashr_sdiv_neg_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 41) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1984,7 +1984,7 @@ define <2 x i8> @ashr_sdiv_neg_splat_vec(<2 x i8> %x) { define <2 x i8> @ashr_sdiv_neg_splat_vec_poison(<2 x i8> %x) { ; CHECK-LABEL: @ashr_sdiv_neg_splat_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -2119,7 +2119,7 @@ define i6 @shl_or7_eq_shl7(i6 %x, i6 %c) { define <2 x i8> @lshr_vec_or7_eq_shl7(<2 x i8> %x, <2 x i8> %c) { ; CHECK-LABEL: @lshr_vec_or7_eq_shl7( -; CHECK-NEXT: [[Y:%.*]] = lshr exact <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = lshr exact <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[Y]] ; %amt = or <2 x i8> %c, @@ -2130,7 +2130,7 @@ define <2 x i8> @lshr_vec_or7_eq_shl7(<2 x i8> %x, <2 x i8> %c) { define <2 x i8> @ashr_vec_or7_eq_ashr7(<2 x i8> %x, <2 x i8> %c) { ; CHECK-LABEL: @ashr_vec_or7_eq_ashr7( -; CHECK-NEXT: [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[Y]] ; %amt = or <2 x i8> %c, @@ -2141,7 +2141,7 @@ define <2 x i8> @ashr_vec_or7_eq_ashr7(<2 x i8> %x, <2 x i8> %c) { ; Negative test not bitwidth - 1 define <2 x i8> @ashr_vec_or6_fail(<2 x i8> %x, <2 x i8> %c) { ; CHECK-LABEL: @ashr_vec_or6_fail( -; CHECK-NEXT: [[AMT:%.*]] = or <2 x i8> [[C:%.*]], +; CHECK-NEXT: [[AMT:%.*]] = or <2 x i8> [[C:%.*]], splat (i8 6) ; CHECK-NEXT: [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], [[AMT]] ; CHECK-NEXT: ret <2 x i8> [[Y]] ; diff --git a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll index daa495579659435..197b9c80658e606 100644 --- a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll @@ -72,7 +72,7 @@ define i1 @scalar_i32_shl_and_negC_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_shl_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_negC_eq( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_shl_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_shl_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_shl_and_negC_eq_poison1( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -96,7 +96,7 @@ define <4 x i1> @vec_shl_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_shl_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_shl_and_negC_eq_poison2( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -108,7 +108,7 @@ define <4 x i1> @vec_shl_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_shl_and_negC_eq_poison3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_shl_and_negC_eq_poison3( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll index dcc181945357da5..27542dad2fe37c9 100644 --- a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll @@ -72,7 +72,7 @@ define i1 @scalar_i32_shl_and_signbit_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_shl_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison1( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -96,7 +96,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison2( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -108,7 +108,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison3( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/shl-bo.ll b/llvm/test/Transforms/InstCombine/shl-bo.ll index 356c4a288f9e31f..c32ac2eacb25a13 100644 --- a/llvm/test/Transforms/InstCombine/shl-bo.ll +++ b/llvm/test/Transforms/InstCombine/shl-bo.ll @@ -20,10 +20,10 @@ define i8 @lshr_add(i8 %a, i8 %y) { define <2 x i8> @lshr_add_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_add_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 5) ; CHECK-NEXT: [[R2:%.*]] = add <2 x i8> [[Y:%.*]], [[B1]] -; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], +; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], splat (i8 -32) ; CHECK-NEXT: ret <2 x i8> [[L]] ; %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization @@ -50,10 +50,10 @@ define i8 @lshr_sub(i8 %a, i8 %y) { define <2 x i8> @lshr_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_sub_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3) ; CHECK-NEXT: [[R2:%.*]] = sub <2 x i8> [[Y:%.*]], [[B1]] -; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], +; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i8> [[L]] ; %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization @@ -79,8 +79,8 @@ define i8 @lshr_and(i8 %a, i8 %y) { define <2 x i8> @lshr_and_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 6) ; CHECK-NEXT: [[R2:%.*]] = and <2 x i8> [[Y:%.*]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[R2]] ; @@ -108,9 +108,9 @@ define i8 @lshr_or(i8 %a, i8 %y) { define <2 x i8> @lshr_or_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_or_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 4) +; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 -16) ; CHECK-NEXT: [[L:%.*]] = or <2 x i8> [[Y_MASKED]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -138,9 +138,9 @@ define i8 @lshr_xor(i8 %a, i8 %y) { define <2 x i8> @lshr_xor_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_xor_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3) +; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 -8) ; CHECK-NEXT: [[L:%.*]] = xor <2 x i8> [[Y_MASKED]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -199,9 +199,9 @@ define i8 @lshr_and_add(i8 %a, i8 %y) { define <2 x i8> @lshr_and_add_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_add_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 96) ; CHECK-NEXT: [[L:%.*]] = add <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -232,9 +232,9 @@ define i8 @lshr_and_sub(i8 %a, i8 %y) { define <2 x i8> @lshr_and_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_sub_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52) ; CHECK-NEXT: [[L:%.*]] = sub <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -264,9 +264,9 @@ define i8 @lshr_and_and(i8 %a, i8 %y) { define <2 x i8> @lshr_and_and_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_and_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52) ; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -329,9 +329,9 @@ define i8 @ashr_and_or_disjoint(i8 %a, i8 %y) { define <2 x i8> @lshr_and_or_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_or_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52) ; CHECK-NEXT: [[L:%.*]] = or <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -361,9 +361,9 @@ define i8 @lshr_and_xor(i8 %a, i8 %y) { define <2 x i8> @lshr_and_xor_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_xor_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52) ; CHECK-NEXT: [[L:%.*]] = xor <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -525,8 +525,8 @@ define i32 @lshr_add_and_shl(i32 %x, i32 %y) { define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @lshr_add_and_shl_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 5) +; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4064) ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[X_MASK]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; @@ -540,7 +540,7 @@ define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @lshr_add_and_shl_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @lshr_add_and_shl_v2i32_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 127) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], ; CHECK-NEXT: ret <2 x i32> [[TMP4]] @@ -583,8 +583,8 @@ define i32 @shl_add_and_lshr(i32 %x, i32 %y) { define <2 x i32> @shl_add_and_lshr_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @shl_add_and_lshr_v2i32( -; CHECK-NEXT: [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 4) +; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[D:%.*]] = add <2 x i32> [[X_MASK]], [[C1]] ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -647,8 +647,8 @@ define <8 x i16> @test_FoldShiftByConstant_CreateSHL2(<8 x i16> %in) { define <16 x i8> @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) { ; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd( -; CHECK-NEXT: [[VSRA_N2:%.*]] = mul <16 x i8> [[IN0:%.*]], -; CHECK-NEXT: [[VSHL_N:%.*]] = and <16 x i8> [[VSRA_N2]], +; CHECK-NEXT: [[VSRA_N2:%.*]] = mul <16 x i8> [[IN0:%.*]], splat (i8 33) +; CHECK-NEXT: [[VSHL_N:%.*]] = and <16 x i8> [[VSRA_N2]], splat (i8 -32) ; CHECK-NEXT: ret <16 x i8> [[VSHL_N]] ; %vsra_n = ashr <16 x i8> %in0, diff --git a/llvm/test/Transforms/InstCombine/shl-demand.ll b/llvm/test/Transforms/InstCombine/shl-demand.ll index 08e6e745818489d..00b681a9b4254e4 100644 --- a/llvm/test/Transforms/InstCombine/shl-demand.ll +++ b/llvm/test/Transforms/InstCombine/shl-demand.ll @@ -95,9 +95,9 @@ define i32 @src_srem_shl_demand_max_mask_hit_demand(i32 %a0) { define <2 x i32> @src_srem_shl_mask_vector(<2 x i32> %a0) { ; CHECK-LABEL: @src_srem_shl_mask_vector( -; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl nsw <2 x i32> [[SREM]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 4) +; CHECK-NEXT: [[SHL:%.*]] = shl nsw <2 x i32> [[SREM]], splat (i32 29) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], splat (i32 -1073741824) ; CHECK-NEXT: ret <2 x i32> [[MASK]] ; %srem = srem <2 x i32> %a0, @@ -108,9 +108,9 @@ define <2 x i32> @src_srem_shl_mask_vector(<2 x i32> %a0) { define <2 x i32> @src_srem_shl_mask_vector_nonconstant(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @src_srem_shl_mask_vector_nonconstant( -; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 4) ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[SREM]], [[A1:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], splat (i32 -1073741824) ; CHECK-NEXT: ret <2 x i32> [[MASK]] ; %srem = srem <2 x i32> %a0, diff --git a/llvm/test/Transforms/InstCombine/shl-sub.ll b/llvm/test/Transforms/InstCombine/shl-sub.ll index 962cf0562ae4e4e..6cf60a76da9fb16 100644 --- a/llvm/test/Transforms/InstCombine/shl-sub.ll +++ b/llvm/test/Transforms/InstCombine/shl-sub.ll @@ -48,7 +48,7 @@ define i64 @shl_sub_i64(i64 %x) { define <2 x i64> @shl_sub_i64_vec(<2 x i64> %x) { ; CHECK-LABEL: @shl_sub_i64_vec( -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i64> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i64> splat (i64 -9223372036854775808), [[X:%.*]] ; CHECK-NEXT: ret <2 x i64> [[R]] ; %s = sub <2 x i64> , %x @@ -58,7 +58,7 @@ define <2 x i64> @shl_sub_i64_vec(<2 x i64> %x) { define <3 x i64> @shl_sub_i64_vec_poison(<3 x i64> %x) { ; CHECK-LABEL: @shl_sub_i64_vec_poison( -; CHECK-NEXT: [[R:%.*]] = lshr exact <3 x i64> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <3 x i64> splat (i64 -9223372036854775808), [[X:%.*]] ; CHECK-NEXT: ret <3 x i64> [[R]] ; %s = sub <3 x i64> , %x @@ -136,8 +136,8 @@ define i64 @shl_bad_sub_i64(i64 %x) { define <2 x i64> @shl_bad_sub_i64_vec(<2 x i64> %x) { ; CHECK-LABEL: @shl_bad_sub_i64_vec( -; CHECK-NEXT: [[S:%.*]] = sub <2 x i64> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i64> , [[S]] +; CHECK-NEXT: [[S:%.*]] = sub <2 x i64> splat (i64 53), [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i64> splat (i64 1), [[S]] ; CHECK-NEXT: ret <2 x i64> [[R]] ; %s = sub <2 x i64> , %x @@ -147,8 +147,8 @@ define <2 x i64> @shl_bad_sub_i64_vec(<2 x i64> %x) { define <2 x i64> @bad_shl_sub_i64_vec(<2 x i64> %x) { ; CHECK-LABEL: @bad_shl_sub_i64_vec( -; CHECK-NEXT: [[S:%.*]] = sub <2 x i64> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shl <2 x i64> , [[S]] +; CHECK-NEXT: [[S:%.*]] = sub <2 x i64> splat (i64 63), [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl <2 x i64> splat (i64 2), [[S]] ; CHECK-NEXT: ret <2 x i64> [[R]] ; %s = sub <2 x i64> , %x @@ -159,7 +159,7 @@ define <2 x i64> @bad_shl_sub_i64_vec(<2 x i64> %x) { define <3 x i64> @shl_sub_i64_vec_undef_bad(<3 x i64> %x) { ; CHECK-LABEL: @shl_sub_i64_vec_undef_bad( ; CHECK-NEXT: [[S:%.*]] = sub <3 x i64> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shl nuw <3 x i64> , [[S]] +; CHECK-NEXT: [[R:%.*]] = shl nuw <3 x i64> splat (i64 1), [[S]] ; CHECK-NEXT: ret <3 x i64> [[R]] ; %s = sub <3 x i64> , %x @@ -191,8 +191,8 @@ define i32 @shl_const_op1_sub_const_op0(i32 %x) { define <2 x i32> @shl_const_op1_sub_const_op0_splat(<2 x i32> %x) { ; CHECK-LABEL: @shl_const_op1_sub_const_op0_splat( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 3) +; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> splat (i32 336), [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = sub <2 x i32> , %x diff --git a/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll b/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll index 25b26770c366dbd..6d6bcb3c46c465d 100644 --- a/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll +++ b/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll @@ -116,7 +116,7 @@ define i1 @scalar_i8_shl_ugt_const(i8 %x) { define <4 x i1> @vector_4xi32_shl_ult_const(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ult_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -128,7 +128,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const(<4 x i32> %x) { define <4 x i1> @vector_4xi32_shl_ult_const_undef1(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ult_const_undef1( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 131072) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %shl = shl <4 x i32> %x, @@ -138,7 +138,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const_undef1(<4 x i32> %x) { define <4 x i1> @vector_4xi32_shl_ult_const_undef2(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ult_const_undef2( -; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], splat (i32 11) ; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -161,7 +161,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const_undef3(<4 x i32> %x) { ; Check 'uge' predicate define <4 x i1> @vector_4xi32_shl_uge_const(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_uge_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -173,7 +173,7 @@ define <4 x i1> @vector_4xi32_shl_uge_const(<4 x i32> %x) { ; Check 'ule' predicate define <4 x i1> @vector_4xi32_shl_ule_const(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ule_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -185,7 +185,7 @@ define <4 x i1> @vector_4xi32_shl_ule_const(<4 x i32> %x) { ; Check 'ugt' predicate define <4 x i1> @vector_4xi32_shl_ugt_const(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ugt_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll index f573ff36d2cea4c..3ed7fc2589d7a94 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll @@ -1392,7 +1392,7 @@ define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @add_or(<4 x i32> %v) { ; CHECK-LABEL: @add_or( -; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1407,7 +1407,7 @@ define <4 x i32> @add_or(<4 x i32> %v) { define <4 x i8> @or_add(<4 x i8> %v) { ; CHECK-LABEL: @or_add( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; @@ -1422,7 +1422,7 @@ define <4 x i8> @or_add(<4 x i8> %v) { define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { ; CHECK-LABEL: @or_add_not_enough_masking( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 1) ; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], ; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> @@ -1439,7 +1439,7 @@ define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { ; CHECK-LABEL: @add_or_2_vars( -; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] @@ -1453,7 +1453,7 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { ; CHECK-LABEL: @or_add_2_vars( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], ; CHECK-NEXT: ret <4 x i8> [[T3]] diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index 7516ea5c0a42cf4..21765f74dad7892 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -1455,7 +1455,7 @@ define <4 x i32> @neg_mul_2_vars(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @add_or(<4 x i32> %v) { ; CHECK-LABEL: @add_or( -; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1481,7 +1481,7 @@ define <4 x i32> @add_or_disjoint(<4 x i32> %v) { define <4 x i8> @or_add(<4 x i8> %v) { ; CHECK-LABEL: @or_add( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; @@ -1496,7 +1496,7 @@ define <4 x i8> @or_add(<4 x i8> %v) { define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { ; CHECK-LABEL: @or_add_not_enough_masking( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 1) ; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], ; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> @@ -1513,7 +1513,7 @@ define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { ; CHECK-LABEL: @add_or_2_vars( -; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] @@ -1527,7 +1527,7 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { ; CHECK-LABEL: @or_add_2_vars( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], ; CHECK-NEXT: ret <4 x i8> [[T3]] diff --git a/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll index 4d8481dd759ea62..30a4546790293f0 100644 --- a/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll @@ -75,7 +75,7 @@ define i1 @scalar_i32_signbit_lshr_and_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_signbit_lshr_and_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_signbit_lshr_and_eq( -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <4 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <4 x i32> splat (i32 -2147483648), [[Y:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[LSHR]], [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll index c97731777283d1c..e43b31ad24fe1ea 100644 --- a/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll @@ -75,7 +75,7 @@ define i1 @scalar_i32_signbit_shl_and_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_signbit_shl_and_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_signbit_shl_and_eq( -; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> splat (i32 -2147483648), [[Y:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHL]], [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/signed-mul-lack-of-overflow-check-via-mul-sdiv.ll b/llvm/test/Transforms/InstCombine/signed-mul-lack-of-overflow-check-via-mul-sdiv.ll index 69f50738af62101..8ab79494f962c31 100644 --- a/llvm/test/Transforms/InstCombine/signed-mul-lack-of-overflow-check-via-mul-sdiv.ll +++ b/llvm/test/Transforms/InstCombine/signed-mul-lack-of-overflow-check-via-mul-sdiv.ll @@ -23,7 +23,7 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( ; CHECK-NEXT: [[MUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.smul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[MUL]], 1 -; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], +; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[MUL_NOT_OV]] ; %t0 = mul <2 x i8> %x, %y diff --git a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll index 513fb69ab7463ea..8015af59f4f8192 100644 --- a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll @@ -173,7 +173,7 @@ define i1 @positive_with_extra_and_logical(i32 %arg, i1 %z) { define <2 x i1> @positive_vec_splat(<2 x i32> %arg) { ; CHECK-LABEL: @positive_vec_splat( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <2 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <2 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <2 x i32> %arg, @@ -185,7 +185,7 @@ define <2 x i1> @positive_vec_splat(<2 x i32> %arg) { define <2 x i1> @positive_vec_nonsplat(<2 x i32> %arg) { ; CHECK-LABEL: @positive_vec_nonsplat( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[ARG:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[ARG]], ; CHECK-NEXT: [[T3:%.*]] = icmp ult <2 x i32> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <2 x i1> [[T1]], [[T3]] @@ -200,7 +200,7 @@ define <2 x i1> @positive_vec_nonsplat(<2 x i32> %arg) { define <3 x i1> @positive_vec_poison0(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison0( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -212,7 +212,7 @@ define <3 x i1> @positive_vec_poison0(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison1(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison1( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -224,7 +224,7 @@ define <3 x i1> @positive_vec_poison1(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison2(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison2( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -236,7 +236,7 @@ define <3 x i1> @positive_vec_poison2(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison3(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison3( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -248,7 +248,7 @@ define <3 x i1> @positive_vec_poison3(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison4(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison4( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -260,7 +260,7 @@ define <3 x i1> @positive_vec_poison4(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison5(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison5( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -272,7 +272,7 @@ define <3 x i1> @positive_vec_poison5(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison6(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison6( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, diff --git a/llvm/test/Transforms/InstCombine/signext.ll b/llvm/test/Transforms/InstCombine/signext.ll index 765a0470c2e483a..f72cf29d2fcc7b3 100644 --- a/llvm/test/Transforms/InstCombine/signext.ll +++ b/llvm/test/Transforms/InstCombine/signext.ll @@ -34,8 +34,8 @@ define i32 @sextinreg_extra_use(i32 %x) { define <2 x i32> @sextinreg_splat(<2 x i32> %x) { ; CHECK-LABEL: @sextinreg_splat( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], +; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 16) +; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %x, @@ -58,8 +58,8 @@ define i32 @sextinreg_alt(i32 %x) { define <2 x i32> @sextinreg_alt_splat(<2 x i32> %x) { ; CHECK-LABEL: @sextinreg_alt_splat( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], +; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 16) +; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %x, @@ -119,8 +119,8 @@ define i32 @sextinreg2(i32 %x) { define <2 x i32> @sextinreg2_splat(<2 x i32> %x) { ; CHECK-LABEL: @sextinreg2_splat( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], +; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 24) +; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 24) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %x, @@ -181,7 +181,7 @@ define i32 @ashr(i32 %x) { define <2 x i32> @ashr_splat(<2 x i32> %x) { ; CHECK-LABEL: @ashr_splat( -; CHECK-NEXT: [[SUB:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SUB:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; %shr = lshr <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll b/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll index e7505721cad604e..b82e33cd2da83ec 100644 --- a/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll +++ b/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll @@ -121,7 +121,7 @@ define i32 @n7(i16 %x) { define <2 x i32> @t8(<2 x i16> %x) { ; CHECK-LABEL: @t8( ; CHECK-NEXT: [[X_SIGNEXT:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %i0 = zext <2 x i16> %x to <2 x i32> @@ -133,7 +133,7 @@ define <2 x i32> @t8(<2 x i16> %x) { define <2 x i32> @t9(<2 x i16> %x) { ; CHECK-LABEL: @t9( ; CHECK-NEXT: [[I1:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %i0 = zext <2 x i16> %x to <2 x i32> @@ -146,7 +146,7 @@ define <2 x i32> @t9(<2 x i16> %x) { define <2 x i32> @t10_undef(<2 x i16> %x) { ; CHECK-LABEL: @t10_undef( ; CHECK-NEXT: [[I0:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[I1:%.*]] = shl nuw <2 x i32> [[I0]], +; CHECK-NEXT: [[I1:%.*]] = shl nuw <2 x i32> [[I0]], splat (i32 16) ; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], ; CHECK-NEXT: ret <2 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll index 65faf0974b503f1..ad88287f1d99e49 100644 --- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll +++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll @@ -158,12 +158,14 @@ entry: define nofpclass(pinf) { <2 x float> } @ret_nofpclass_vector_elems_struct_ty_pinf__ninf() { ; CHECK-LABEL: define nofpclass(pinf) { <2 x float> } @ret_nofpclass_vector_elems_struct_ty_pinf__ninf() { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret { <2 x float> } { <2 x float> } +; CHECK-NEXT: ret { <2 x float> } { <2 x float> splat (float 0xFFF0000000000000) } ; entry: ret { <2 x float>} { <2 x float> } } +; UTC_ARGS: --disable +; FileCheck does not like the nested square brackets. define nofpclass(pinf) [ 1 x [ 1 x float ]] @ret_nofpclass_nested_array_ty_pinf__ninf() { ; CHECK-LABEL: @ret_nofpclass_nested_array_ty_pinf__ninf() { ; CHECK-NEXT: entry: @@ -172,6 +174,7 @@ define nofpclass(pinf) [ 1 x [ 1 x float ]] @ret_nofpclass_nested_array_ty_pinf_ entry: ret [ 1 x [ 1 x float ]] [[ 1 x float ] [float 0xFFF0000000000000]] } +; UTC_ARGS: --enable define nofpclass(pzero) { float, float } @ret_nofpclass_multiple_elems_struct_ty_pzero__nzero() { ; CHECK-LABEL: define nofpclass(pzero) { float, float } @ret_nofpclass_multiple_elems_struct_ty_pzero__nzero() { diff --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll index 51eff39cd900e29..71725ff2f3d0e47 100644 --- a/llvm/test/Transforms/InstCombine/sitofp.ll +++ b/llvm/test/Transforms/InstCombine/sitofp.ll @@ -381,7 +381,7 @@ define i12 @u32_half_u12(i32 %x) { define <2 x i1> @i8_vec_sitofp_test1(<2 x i8> %A) { ; CHECK-LABEL: @i8_vec_sitofp_test1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %B = sitofp <2 x i8> %A to <2 x double> %C = fcmp ult <2 x double> %B, @@ -390,7 +390,7 @@ define <2 x i1> @i8_vec_sitofp_test1(<2 x i8> %A) { define <2 x i1> @i8_vec_sitofp_test2(<2 x i8> %A) { ; CHECK-LABEL: @i8_vec_sitofp_test2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %B = sitofp <2 x i8> %A to <2 x double> %C = fcmp ugt <2 x double> %B, @@ -399,7 +399,7 @@ define <2 x i1> @i8_vec_sitofp_test2(<2 x i8> %A) { define <2 x i1> @i8_vec_sitofp_test3(<2 x i8> %A) { ; CHECK-LABEL: @i8_vec_sitofp_test3( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %B = sitofp <2 x i8> %A to <2 x double> %C = fcmp ule <2 x double> %B, @@ -408,7 +408,7 @@ define <2 x i1> @i8_vec_sitofp_test3(<2 x i8> %A) { define <2 x i1> @i8_vec_sitofp_test4(<2 x i8> %A) { ; CHECK-LABEL: @i8_vec_sitofp_test4( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = sitofp <2 x i8> %A to <2 x double> diff --git a/llvm/test/Transforms/InstCombine/smin-icmp.ll b/llvm/test/Transforms/InstCombine/smin-icmp.ll index d1283d8afc0a74d..f05c1698f1a980b 100644 --- a/llvm/test/Transforms/InstCombine/smin-icmp.ll +++ b/llvm/test/Transforms/InstCombine/smin-icmp.ll @@ -911,7 +911,7 @@ define void @eq_smin_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], [[X]] ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) ; CHECK-NEXT: [[CMP4:%.*]] = icmp sge <2 x i32> [[Y]], [[X]] ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) @@ -956,24 +956,24 @@ define void @eq_smin_v2i32(<2 x i32> %x, <2 x i32> %y) { ; icmp pred smin(C1, Y), C2 where C1 == C2 define void @eq_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @eq_smin_v2i32_constant( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 10)) +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; @@ -1004,21 +1004,21 @@ define void @eq_smin_v2i32_constant(<2 x i32> %y) { ; icmp pred smin(C1, Y), C2 where C1 < C2 define void @slt_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @slt_smin_v2i32_constant( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 5)) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; %cond = call <2 x i32> @llvm.smin.v2i32(<2 x i32> , <2 x i32> %y) @@ -1049,23 +1049,23 @@ define void @slt_smin_v2i32_constant(<2 x i32> %y) { define void @sle_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @sle_smin_v2i32_constant( ; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; @@ -1096,26 +1096,26 @@ define void @sle_smin_v2i32_constant(<2 x i32> %y) { ; icmp pred smin(C1, Y), C2 where C1 > C2 define void @sgt_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @sgt_smin_v2i32_constant( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 15)) +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP2]]) -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP3]]) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; @@ -1147,25 +1147,25 @@ define void @sgt_smin_v2i32_constant(<2 x i32> %y) { define void @sge_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @sge_smin_v2i32_constant( ; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP2]]) -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP3]]) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; @@ -1197,25 +1197,25 @@ define void @sge_smin_v2i32_constant(<2 x i32> %y) { define void @unknown_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @unknown_smin_v2i32_constant( ; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP2]]) -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP3]]) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll index f72fe5a6a5817bb..0f4db3b3a65ae72 100644 --- a/llvm/test/Transforms/InstCombine/sqrt.ll +++ b/llvm/test/Transforms/InstCombine/sqrt.ll @@ -192,7 +192,7 @@ define double @sqrt_exp_intr_and_libcall_2(double %x) { define <2 x float> @sqrt_exp_vec(<2 x float> %x) { ; CHECK-LABEL: @sqrt_exp_vec( -; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], +; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 5.000000e-01) ; CHECK-NEXT: [[E:%.*]] = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> [[MERGED_SQRT]]) ; CHECK-NEXT: ret <2 x float> [[E]] ; diff --git a/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll b/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll index 70fd4eefe74f5dd..3607b191664963b 100644 --- a/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll +++ b/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll @@ -53,7 +53,7 @@ define { i8, i1 } @no_fold_on_constant_sub_overflow(i8 %x) { define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant( -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 -42)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = sub nsw <2 x i32> %x, @@ -64,7 +64,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { ; CHECK-LABEL: @no_fold_splat_undef_constant( ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 -30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = sub nsw <2 x i32> %x, @@ -75,7 +75,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @no_fold_splat_not_constant( ; CHECK-NEXT: [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 -30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = sub nsw <2 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll index e21ca605fc5af53..9774f6db555440e 100644 --- a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll +++ b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll @@ -130,7 +130,7 @@ define i32 @neg_or_ashr_i32_commute(i32 %x0) { define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @sub_ashr_or_i32_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[X]] +; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %sub = sub nsw <4 x i32> %y, %x @@ -142,7 +142,7 @@ define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[X]] +; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %sub = sub nuw nsw <4 x i32> %y, %x @@ -166,7 +166,7 @@ define <4 x i32> @neg_or_ashr_i32_vec(<4 x i32> %x) { define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @sub_ashr_or_i32_vec_commute( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[X]] +; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %sub = sub nsw <4 x i32> %y, %x @@ -177,7 +177,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @neg_or_ashr_i32_vec_commute(<4 x i32> %x0) { ; CHECK-LABEL: @neg_or_ashr_i32_vec_commute( -; CHECK-NEXT: [[X:%.*]] = sdiv <4 x i32> , [[X0:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <4 x i32> splat (i32 42), [[X0:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[X]], zeroinitializer ; CHECK-NEXT: [[SHR:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHR]] @@ -283,7 +283,7 @@ define i32 @neg_or_extra_use_ashr_i32(i32 %x, ptr %p) { define <4 x i32> @sub_ashr_or_i32_vec_undef1(<4 x i32> %x) { ; CHECK-LABEL: @sub_ashr_or_i32_vec_undef1( ; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> , [[X:%.*]] -; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], +; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], splat (i32 31) ; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] ; @@ -295,7 +295,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_undef1(<4 x i32> %x) { define <4 x i32> @sub_ashr_or_i32_vec_undef2(<4 x i32> %x) { ; CHECK-LABEL: @sub_ashr_or_i32_vec_undef2( -; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> , [[X:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> splat (i32 255), [[X:%.*]] ; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], ; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] diff --git a/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll index 33c02d77c45b904..1010aab7cbb0b0a 100644 --- a/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll +++ b/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll @@ -47,7 +47,7 @@ define <4 x i32> @neg_or_lshr_i32_vec(<4 x i32> %x) { define <4 x i32> @neg_or_lshr_i32_vec_commute(<4 x i32> %x0) { ; CHECK-LABEL: @neg_or_lshr_i32_vec_commute( -; CHECK-NEXT: [[X:%.*]] = sdiv <4 x i32> , [[X0:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <4 x i32> splat (i32 42), [[X0:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[X]], zeroinitializer ; CHECK-NEXT: [[SHR:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHR]] diff --git a/llvm/test/Transforms/InstCombine/sub-not.ll b/llvm/test/Transforms/InstCombine/sub-not.ll index 5053319162f0d2e..9fa55a6896126a6 100644 --- a/llvm/test/Transforms/InstCombine/sub-not.ll +++ b/llvm/test/Transforms/InstCombine/sub-not.ll @@ -29,7 +29,7 @@ define i8 @sub_not_extra_use(i8 %x, i8 %y) { define <2 x i8> @sub_not_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_not_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -64,7 +64,7 @@ define i8 @dec_sub_extra_use(i8 %x, i8 %y) { define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @dec_sub_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -99,7 +99,7 @@ define i8 @sub_inc_extra_use(i8 %x, i8 %y) { define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_inc_vec( -; CHECK-NEXT: [[S_NEG:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[S_NEG:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[S_NEG]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -134,7 +134,7 @@ define i8 @sub_dec_extra_use(i8 %x, i8 %y) { define <2 x i8> @sub_dec_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_dec_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll index 60607041ad2f905..3bbb9b931e4331e 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll @@ -572,7 +572,7 @@ define i4 @negate_xor(i4 %x) { define <2 x i4> @negate_xor_vec(<2 x i4> %x) { ; CHECK-LABEL: @negate_xor_vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], splat (i4 1) ; CHECK-NEXT: ret <2 x i4> [[O_NEG]] ; %o = xor <2 x i4> %x, @@ -623,8 +623,8 @@ define i8 @negate_shl_not_uses(i8 %x, i8 %y) { define <2 x i4> @negate_mul_not_uses_vec(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @negate_mul_not_uses_vec( -; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[O:%.*]] = xor <2 x i4> [[X]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[X:%.*]], splat (i4 1) +; CHECK-NEXT: [[O:%.*]] = xor <2 x i4> [[X]], splat (i4 -1) ; CHECK-NEXT: call void @use_v2i4(<2 x i4> [[O]]) ; CHECK-NEXT: [[S_NEG:%.*]] = mul <2 x i4> [[O_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i4> [[S_NEG]] diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index b19eae4d8f9a41c..bffdc6d6cd497bf 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -596,7 +596,7 @@ define i4 @negate_xor(i4 %x) { define <2 x i4> @negate_xor_vec(<2 x i4> %x) { ; CHECK-LABEL: @negate_xor_vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], splat (i4 1) ; CHECK-NEXT: ret <2 x i4> [[O_NEG]] ; %o = xor <2 x i4> %x, @@ -647,8 +647,8 @@ define i8 @negate_shl_not_uses(i8 %x, i8 %y) { define <2 x i4> @negate_mul_not_uses_vec(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @negate_mul_not_uses_vec( -; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[O:%.*]] = xor <2 x i4> [[X]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[X:%.*]], splat (i4 1) +; CHECK-NEXT: [[O:%.*]] = xor <2 x i4> [[X]], splat (i4 -1) ; CHECK-NEXT: call void @use_v2i4(<2 x i4> [[O]]) ; CHECK-NEXT: [[S_NEG:%.*]] = mul <2 x i4> [[O_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i4> [[S_NEG]] @@ -1458,8 +1458,8 @@ if.end: define <1 x i64> @PR56601(<1 x i64> %x, <1 x i64> %y) { ; CHECK-LABEL: @PR56601( -; CHECK-NEXT: [[M1:%.*]] = mul nsw <1 x i64> [[X:%.*]], -; CHECK-NEXT: [[M2:%.*]] = mul nsw <1 x i64> [[Y:%.*]], +; CHECK-NEXT: [[M1:%.*]] = mul nsw <1 x i64> [[X:%.*]], splat (i64 42) +; CHECK-NEXT: [[M2:%.*]] = mul nsw <1 x i64> [[Y:%.*]], splat (i64 12) ; CHECK-NEXT: [[A1:%.*]] = add <1 x i64> [[M1]], ; CHECK-NEXT: [[A2:%.*]] = add <1 x i64> [[M2]], ; CHECK-NEXT: [[R:%.*]] = sub <1 x i64> [[A1]], [[A2]] diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll index b4add9698b16091..a4135e0b5145321 100644 --- a/llvm/test/Transforms/InstCombine/sub-xor.ll +++ b/llvm/test/Transforms/InstCombine/sub-xor.ll @@ -16,8 +16,8 @@ define i32 @low_mask_nsw_nuw(i32 %x) { define <2 x i32> @low_mask_nsw_nuw_vec(<2 x i32> %x) { ; CHECK-LABEL: @low_mask_nsw_nuw_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31) +; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i32> [[AND]], splat (i32 63) ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; %and = and <2 x i32> %x, @@ -73,8 +73,8 @@ define i8 @arbitrary_mask_sub_nuw_high_bit_dont_care_i8(i8 %x) { define <2 x i5> @arbitrary_mask_sub_v2i5(<2 x i5> %x) { ; CHECK-LABEL: @arbitrary_mask_sub_v2i5( -; CHECK-NEXT: [[A:%.*]] = and <2 x i5> [[X:%.*]], -; CHECK-NEXT: [[M:%.*]] = sub nuw nsw <2 x i5> , [[A]] +; CHECK-NEXT: [[A:%.*]] = and <2 x i5> [[X:%.*]], splat (i5 -8) +; CHECK-NEXT: [[M:%.*]] = sub nuw nsw <2 x i5> splat (i5 -6), [[A]] ; CHECK-NEXT: ret <2 x i5> [[M]] ; %a = and <2 x i5> %x, ; 0b11000 @@ -135,8 +135,8 @@ define i32 @xor_add_extra_use(i32 %x) { define <2 x i8> @xor_add_splat(<2 x i8> %x) { ; CHECK-LABEL: @xor_add_splat( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = sub nuw nsw <2 x i8> , [[AND]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 24) +; CHECK-NEXT: [[ADD:%.*]] = sub nuw nsw <2 x i8> splat (i8 105), [[AND]] ; CHECK-NEXT: ret <2 x i8> [[ADD]] ; %and = and <2 x i8> %x, @@ -147,9 +147,9 @@ define <2 x i8> @xor_add_splat(<2 x i8> %x) { define <2 x i8> @xor_add_splat_undef(<2 x i8> %x) { ; CHECK-LABEL: @xor_add_splat_undef( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 24) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[AND]], -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[XOR]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[XOR]], splat (i8 42) ; CHECK-NEXT: ret <2 x i8> [[ADD]] ; %and = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index ff3f046607ec3a6..b1313007d509fc0 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -320,7 +320,7 @@ define i32 @test13(i32 %A) { define <2 x i32> @test12vec(<2 x i32> %A) { ; CHECK-LABEL: @test12vec( -; CHECK-NEXT: [[B_NEG:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B_NEG:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[B_NEG]] ; %B = ashr <2 x i32> %A, @@ -330,7 +330,7 @@ define <2 x i32> @test12vec(<2 x i32> %A) { define <2 x i32> @test13vec(<2 x i32> %A) { ; CHECK-LABEL: @test13vec( -; CHECK-NEXT: [[B_NEG:%.*]] = ashr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B_NEG:%.*]] = ashr <2 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[B_NEG]] ; %B = lshr <2 x i32> %A, @@ -619,7 +619,7 @@ define <2 x i32> @test27vec(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @test27vecsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test27vecsplat( -; CHECK-NEXT: [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 3) ; CHECK-NEXT: [[SUB:%.*]] = add <2 x i32> [[MUL_NEG]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; @@ -663,7 +663,7 @@ define <2 x i32> @test27commutedvec(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @test27commutedvecsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test27commutedvecsplat( -; CHECK-NEXT: [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 3) ; CHECK-NEXT: [[SUB:%.*]] = add <2 x i32> [[MUL_NEG]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; @@ -749,7 +749,7 @@ define <2 x i64> @test36(<2 x i64> %A) { define <2 x i32> @test37(<2 x i32> %A) { ; CHECK-LABEL: @test37( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[DIV_NEG]] ; @@ -832,7 +832,7 @@ define i32 @test44(i32 %x) { define <2 x i32> @test44vec(<2 x i32> %x) { ; CHECK-LABEL: @test44vec( -; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -32768) ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; %sub = sub nsw <2 x i32> %x, @@ -850,7 +850,7 @@ define @test44scalablevec( %x) { define <2 x i16> @test44vecminval(<2 x i16> %x) { ; CHECK-LABEL: @test44vecminval( -; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i16> [[X:%.*]], splat (i16 -32768) ; CHECK-NEXT: ret <2 x i16> [[SUB]] ; %sub = sub nsw <2 x i16> %x, @@ -1001,7 +1001,7 @@ define i32 @test54(i1 %C) { define <2 x i32> @test54vec(i1 %C) { ; CHECK-LABEL: @test54vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 -877), <2 x i32> splat (i32 113) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -1048,7 +1048,7 @@ define <2 x i32> @test55vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A_NEG:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A_NEG:%.*]] = phi <2 x i32> [ splat (i32 -877), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A_NEG]] ; entry: @@ -1228,8 +1228,8 @@ define i32 @test62(i32 %A) { define <2 x i32> @test62vec(<2 x i32> %A) { ; CHECK-LABEL: @test62vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = sub <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 1) +; CHECK-NEXT: [[C:%.*]] = sub <2 x i32> splat (i32 2), [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = sub <2 x i32> , %A @@ -1250,7 +1250,7 @@ define i32 @test63(i32 %A) { define <2 x i32> @test63vec(<2 x i32> %A) { ; CHECK-LABEL: @test63vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %B = sub <2 x i32> , %A @@ -1314,8 +1314,8 @@ define i32 @test67(i32 %x) { ; Check splat vectors too define <2 x i32> @test68(<2 x i32> %x) { ; CHECK-LABEL: @test68( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) -; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 255)) +; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[DOTNEG]] ; %1 = xor <2 x i32> %x, @@ -1329,7 +1329,7 @@ define <2 x i32> @test68(<2 x i32> %x) { define <2 x i32> @test69(<2 x i32> %x) { ; CHECK-LABEL: @test69( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) -; CHECK-NEXT: [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[DOTNEG]] ; %1 = xor <2 x i32> %x, @@ -1367,7 +1367,7 @@ define i32 @test71(i32 %A, i32 %B) { ; Check (X | Y) - Y --> X & ~Y where X and Y are vectors define <2 x i32> @test72(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test72( -; CHECK-NEXT: [[B_NOT:%.*]] = xor <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[B_NOT:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 -1) ; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[A:%.*]], [[B_NOT]] ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -1552,10 +1552,10 @@ define i8 @sub_not_mask_lowbits(i8 %x) { define <2 x i8> @sub_mask_lowbits_splat_extra_use(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @sub_mask_lowbits_splat_extra_use( -; CHECK-NEXT: [[A2:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A2:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 10) ; CHECK-NEXT: store <2 x i8> [[A2]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -11) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -64) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a1 = add <2 x i8> %x, ; 0xc0 @@ -2056,7 +2056,7 @@ define i8 @mul_sub_common_factor_commute1(i8 %x, i8 %y) { define <2 x i8> @mul_sub_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @mul_sub_common_factor_commute2( -; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[A:%.*]] = mul <2 x i8> [[M1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[A]] ; @@ -2204,9 +2204,9 @@ define i8 @shrink_sub_from_constant_lowbits2(i8 %x) { define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) { ; CHECK-LABEL: @shrink_sub_from_constant_lowbits3( -; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X0000]] -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], +; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 4) +; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 24), [[X0000]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x0000 = shl <2 x i8> %x, ; 4 low bits are known zero @@ -2373,8 +2373,8 @@ define i10 @sub_to_and_negative4(i10 %x) { define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector1( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 120) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], splat (i8 -9) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> , %x @@ -2387,8 +2387,8 @@ define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) { define <2 x i8> @sub_to_and_vector2(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector2( ; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], -; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> , [[AND]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], splat (i8 120) +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> splat (i8 77), [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> , %x @@ -2400,9 +2400,9 @@ define <2 x i8> @sub_to_and_vector2(<2 x i8> %x) { define <2 x i8> @sub_to_and_vector3(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector3( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 71), [[X:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], -; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> , [[AND]] +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> splat (i8 44), [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> , %x @@ -2414,8 +2414,8 @@ define <2 x i8> @sub_to_and_vector3(<2 x i8> %x) { define <2 x i8> @sub_to_and_vector4(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector4( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], +; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 71), [[X:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], splat (i8 120) ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> , [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll index 4593730b8809f4c..6a9b31f3c040e61 100644 --- a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll +++ b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll @@ -401,7 +401,7 @@ define <8 x i8> @narrow_zext_ashr_keep_trunc_vector(<8 x i8> %i1, <8 x i8> %i2) ; CHECK-NEXT: [[I1_EXT:%.*]] = sext <8 x i8> [[I1]] to <8 x i32> ; CHECK-NEXT: [[I2_EXT:%.*]] = sext <8 x i8> [[I2]] to <8 x i32> ; CHECK-NEXT: [[SUB:%.*]] = add nsw <8 x i32> [[I1_EXT]], [[I2_EXT]] -; CHECK-NEXT: [[SHIFT:%.*]] = lshr <8 x i32> [[SUB]], +; CHECK-NEXT: [[SHIFT:%.*]] = lshr <8 x i32> [[SUB]], splat (i32 1) ; CHECK-NEXT: [[T:%.*]] = trunc <8 x i32> [[SHIFT]] to <8 x i8> ; CHECK-NEXT: ret <8 x i8> [[T]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc-demand.ll b/llvm/test/Transforms/InstCombine/trunc-demand.ll index 9d7bf589268e2b6..aa8021c059013d1 100644 --- a/llvm/test/Transforms/InstCombine/trunc-demand.ll +++ b/llvm/test/Transforms/InstCombine/trunc-demand.ll @@ -85,8 +85,8 @@ define i6 @trunc_lshr_use2(i8 %x) { define <2 x i7> @trunc_lshr_vec_splat(<2 x i16> %x) { ; CHECK-LABEL: @trunc_lshr_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i7> -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], splat (i7 5) +; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[TMP2]], splat (i7 1) ; CHECK-NEXT: ret <2 x i7> [[R]] ; %s = lshr <2 x i16> %x, @@ -100,7 +100,7 @@ define <2 x i7> @trunc_lshr_vec_splat(<2 x i16> %x) { define <2 x i7> @trunc_lshr_vec_splat_exact_mask(<2 x i16> %x) { ; CHECK-LABEL: @trunc_lshr_vec_splat_exact_mask( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i7> -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], splat (i7 6) ; CHECK-NEXT: ret <2 x i7> [[TMP2]] ; %s = lshr <2 x i16> %x, @@ -113,9 +113,9 @@ define <2 x i7> @trunc_lshr_vec_splat_exact_mask(<2 x i16> %x) { define <2 x i7> @trunc_lshr_big_shift(<2 x i16> %x) { ; CHECK-LABEL: @trunc_lshr_big_shift( -; CHECK-NEXT: [[S:%.*]] = lshr <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[S:%.*]] = lshr <2 x i16> [[X:%.*]], splat (i16 7) ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i16> [[S]] to <2 x i7> -; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[T]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[T]], splat (i7 1) ; CHECK-NEXT: ret <2 x i7> [[R]] ; %s = lshr <2 x i16> %x, diff --git a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll index 063006ba5eea8be..33fa2c375f1ecaa 100644 --- a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll @@ -24,7 +24,7 @@ define i64 @test1(i64 %a) { define <2 x i64> @test1_vec(<2 x i64> %a) { ; CHECK-LABEL: @test1_vec( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], splat (i64 15) ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; @@ -82,8 +82,8 @@ define i64 @test2(i64 %a) { define <2 x i64> @test2_vec(<2 x i64> %a) { ; CHECK-LABEL: @test2_vec( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], splat (i64 36) +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], splat (i64 36) ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; @@ -229,8 +229,8 @@ define i32 @trunc_ashr(i32 %X) { define <2 x i32> @trunc_ashr_vec(<2 x i32> %X) { ; CHECK-LABEL: @trunc_ashr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8) +; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %A = zext <2 x i32> %X to <2 x i36> @@ -272,7 +272,7 @@ define <2 x i64> @test8_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test8_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], +; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], splat (i64 32) ; CHECK-NEXT: [[F:%.*]] = or disjoint <2 x i64> [[E]], [[C]] ; CHECK-NEXT: ret <2 x i64> [[F]] ; @@ -358,7 +358,7 @@ define i64 @test11(i32 %A, i32 %B) { define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test11_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -422,7 +422,7 @@ define i64 @test12(i32 %A, i32 %B) { define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test12_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = lshr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -486,7 +486,7 @@ define i64 @test13(i32 %A, i32 %B) { define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test13_vec( ; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = ashr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -704,7 +704,7 @@ define i32 @trunc_shl_32_i32_i64(i64 %val) { define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %shl = shl <2 x i64> %val, @@ -754,7 +754,7 @@ define i32 @trunc_shl_lshr_infloop(i64 %arg) { define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_uniform( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %shl = shl <2 x i64> %val, @@ -862,7 +862,7 @@ define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) { define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15( ; CHECK-NEXT: [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], +; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; %shl = shl <8 x i32> %a, @@ -891,7 +891,7 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) { define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4( ; CHECK-NEXT: [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], +; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 4) ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; %shl = shl <8 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll index c50a3d06d24b9c3..69ccca562e885cb 100644 --- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll @@ -34,7 +34,7 @@ define <2 x i8> @trunc_shl_trunc(<2 x i64> %a) { define <2 x i8> @trunc_lshr_trunc_uniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_lshr_trunc_uniform( -; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 8) ; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; @@ -120,7 +120,7 @@ define i8 @trunc_ashr_trunc_exact(i64 %a) { define <2 x i8> @trunc_ashr_trunc_uniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_ashr_trunc_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 8) ; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll index 0956fc655ec9a77..15533346b32ece5 100644 --- a/llvm/test/Transforms/InstCombine/trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc.ll @@ -25,7 +25,7 @@ define i64 @test1(i64 %a) { define <2 x i64> @test1_vec(<2 x i64> %a) { ; CHECK-LABEL: @test1_vec( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], splat (i64 15) ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; @@ -83,8 +83,8 @@ define i64 @test2(i64 %a) { define <2 x i64> @test2_vec(<2 x i64> %a) { ; CHECK-LABEL: @test2_vec( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], splat (i64 36) +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], splat (i64 36) ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; @@ -230,8 +230,8 @@ define i32 @trunc_ashr(i32 %X) { define <2 x i32> @trunc_ashr_vec(<2 x i32> %X) { ; CHECK-LABEL: @trunc_ashr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %A = zext <2 x i32> %X to <2 x i36> @@ -273,7 +273,7 @@ define <2 x i64> @test8_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test8_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], +; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], splat (i64 32) ; CHECK-NEXT: [[F:%.*]] = or disjoint <2 x i64> [[E]], [[C]] ; CHECK-NEXT: ret <2 x i64> [[F]] ; @@ -359,7 +359,7 @@ define i64 @test11(i32 %A, i32 %B) { define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test11_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -423,7 +423,7 @@ define i64 @test12(i32 %A, i32 %B) { define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test12_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = lshr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -487,7 +487,7 @@ define i64 @test13(i32 %A, i32 %B) { define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test13_vec( ; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = ashr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -705,7 +705,7 @@ define i32 @trunc_shl_32_i32_i64(i64 %val) { define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %shl = shl <2 x i64> %val, @@ -755,7 +755,7 @@ define i32 @trunc_shl_lshr_infloop(i64 %arg) { define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_uniform( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %shl = shl <2 x i64> %val, @@ -863,7 +863,7 @@ define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) { define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15( ; CHECK-NEXT: [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], +; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; %shl = shl <8 x i32> %a, @@ -892,7 +892,7 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) { define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4( ; CHECK-NEXT: [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], +; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 4) ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; %shl = shl <8 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/truncating-saturate.ll b/llvm/test/Transforms/InstCombine/truncating-saturate.ll index c0111528e2a4d8e..44bba283c668627 100644 --- a/llvm/test/Transforms/InstCombine/truncating-saturate.ll +++ b/llvm/test/Transforms/InstCombine/truncating-saturate.ll @@ -62,8 +62,8 @@ define i16 @testi32i16i8(i32 %add) { define <4 x i16> @testv4i32i16i8(<4 x i32> %add) { ; CHECK-LABEL: @testv4i32i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> ) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> splat (i32 -128)) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 127)) ; CHECK-NEXT: [[R:%.*]] = trunc nsw <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[R]] ; @@ -147,8 +147,8 @@ define i32 @testi64i32addsat(i32 %a, i32 %b) { define <4 x i8> @testv4i16i8(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> ) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> splat (i16 -128)) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 127)) ; CHECK-NEXT: [[COND_I:%.*]] = trunc nsw <4 x i16> [[TMP2]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[COND_I]] ; @@ -567,7 +567,7 @@ define i8 @C0zero(i8 %X, i8 %y, i8 %z) { define <2 x i8> @C0zeroV(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @C0zeroV( -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 -10) ; CHECK-NEXT: [[F:%.*]] = select <2 x i1> [[C]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]] ; CHECK-NEXT: ret <2 x i8> [[F]] ; @@ -581,9 +581,9 @@ define <2 x i8> @C0zeroV(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) { define <2 x i8> @C0zeroVu(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @C0zeroVu( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 10) ; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[A]], -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X]], splat (i8 -10) ; CHECK-NEXT: [[F:%.*]] = select <2 x i1> [[C]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[F]] ; CHECK-NEXT: ret <2 x i8> [[R]] diff --git a/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll index fd5d38bb38ddc79..eb021a0fd2c894e 100644 --- a/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll +++ b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll @@ -42,7 +42,7 @@ define { i8, i1 } @no_fold_on_constant_add_overflow(i8 %x) { define { <2 x i8>, <2 x i1> } @no_fold_vector_no_overflow(<2 x i8> %x) { ; CHECK-LABEL: @no_fold_vector_no_overflow( ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> splat (i8 55)) ; CHECK-NEXT: ret { <2 x i8>, <2 x i1> } [[B]] ; %a = add nuw <2 x i8> %x, @@ -53,7 +53,7 @@ define { <2 x i8>, <2 x i1> } @no_fold_vector_no_overflow(<2 x i8> %x) { define { <2 x i8>, <2 x i1> } @no_fold_vector_overflow(<2 x i8> %x) { ; CHECK-LABEL: @no_fold_vector_overflow( ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> splat (i8 55)) ; CHECK-NEXT: ret { <2 x i8>, <2 x i1> } [[B]] ; %a = add nuw <2 x i8> %x, @@ -63,7 +63,7 @@ define { <2 x i8>, <2 x i1> } @no_fold_vector_overflow(<2 x i8> %x) { define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant( -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nuw <2 x i32> %x, @@ -74,7 +74,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { ; CHECK-LABEL: @no_fold_splat_undef_constant( ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nuw <2 x i32> %x, @@ -85,7 +85,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @no_fold_splat_not_constant( ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nuw <2 x i32> %x, %y @@ -128,7 +128,7 @@ define { i32, i1 } @no_fold_wrapped_add(i32 %x) { define { <2 x i32>, <2 x i1> } @fold_simple_splat_with_disjoint_or_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_with_disjoint_or_constant( -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = or disjoint <2 x i32> %x, @@ -139,8 +139,8 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_with_disjoint_or_constant(<2 x define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant_with_or_fail(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant_with_or_fail( -; CHECK-NEXT: [[A:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[A:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 12) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = or <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/uaddo.ll b/llvm/test/Transforms/InstCombine/uaddo.ll index 9b56dce8b45856f..ae7a07ec8000c9e 100644 --- a/llvm/test/Transforms/InstCombine/uaddo.ll +++ b/llvm/test/Transforms/InstCombine/uaddo.ll @@ -18,7 +18,7 @@ define i32 @uaddo_commute1(i32 %x, i32 %y, i32 %z) { define <2 x i32> @uaddo_commute2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { ; CHECK-LABEL: @uaddo_commute2( -; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 -1) ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[X:%.*]] ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[X]], [[NOTY]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[Z:%.*]], <2 x i32> [[A]] diff --git a/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll b/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll index 824ed4f2b439473..546824680ea1313 100644 --- a/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll +++ b/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll @@ -24,7 +24,7 @@ define i64 @test(i64 %X, i1 %Cond ) { define <2 x i32> @PR34856(<2 x i32> %t0, <2 x i32> %t1) { ; CHECK-LABEL: @PR34856( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[T1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[T1:%.*]], splat (i32 -8) ; CHECK-NEXT: [[DIV1:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[DIV1]] ; diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll index 0d87122660cfa18..c8228057eeb10ac 100644 --- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll +++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll @@ -108,7 +108,7 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) { define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 64) ; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -298,7 +298,7 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) { define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 33554432) ; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll b/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll index 35de89a461fa05c..0c490549c0f4ea7 100644 --- a/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll +++ b/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll @@ -8,8 +8,8 @@ define <2 x i4> @splat (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -51,7 +51,7 @@ define <2 x i4> @nonsplat (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @in_constant_varx_mone(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_mone( -; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[X:%.*]], splat (i4 -2) ; CHECK-NEXT: ret <2 x i4> [[R1]] ; %n0 = xor <2 x i4> %x, ; %x @@ -62,7 +62,7 @@ define <2 x i4> @in_constant_varx_mone(<2 x i4> %x, <2 x i4> %mask) { define <2 x i4> @in_constant_varx_14(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_14( -; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[X:%.*]], splat (i4 -2) ; CHECK-NEXT: ret <2 x i4> [[R1]] ; %n0 = xor <2 x i4> %x, ; %x @@ -73,7 +73,7 @@ define <2 x i4> @in_constant_varx_14(<2 x i4> %x, <2 x i4> %mask) { define <2 x i4> @in_constant_varx_14_nonsplat(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_14_nonsplat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -97,7 +97,7 @@ define <3 x i4> @in_constant_varx_14_undef(<3 x i4> %x, <3 x i4> %mask) { define <2 x i4> @in_constant_mone_vary(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_mone_vary( -; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[Y:%.*]], splat (i4 1) ; CHECK-NEXT: ret <2 x i4> [[R1]] ; %n0 = xor <2 x i4> %y, ; %x @@ -108,7 +108,7 @@ define <2 x i4> @in_constant_mone_vary(<2 x i4> %y, <2 x i4> %mask) { define <2 x i4> @in_constant_14_vary(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_14_vary( -; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, ; %x @@ -119,7 +119,7 @@ define <2 x i4> @in_constant_14_vary(<2 x i4> %y, <2 x i4> %mask) { define <2 x i4> @in_constant_14_vary_nonsplat(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_14_vary_nonsplat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -150,8 +150,8 @@ declare <2 x i4> @gen4() define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_1_0_0( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -163,8 +163,8 @@ define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_0_1_0( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -178,8 +178,8 @@ define <2 x i4> @c_0_0_1 () { ; CHECK-LABEL: @c_0_0_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -193,8 +193,8 @@ define <2 x i4> @c_0_0_1 () { define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_1_1_0( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -207,8 +207,8 @@ define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @c_1_0_1 (<2 x i4> %x) { ; CHECK-LABEL: @c_1_0_1( ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -222,8 +222,8 @@ define <2 x i4> @c_1_0_1 (<2 x i4> %x) { define <2 x i4> @c_0_1_1 (<2 x i4> %y) { ; CHECK-LABEL: @c_0_1_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -238,8 +238,8 @@ define <2 x i4> @c_1_1_1 () { ; CHECK-LABEL: @c_1_1_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -253,7 +253,7 @@ define <2 x i4> @c_1_1_1 () { define <2 x i4> @commutativity_constant_14_vary(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @commutativity_constant_14_vary( -; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, ; %x @@ -273,7 +273,7 @@ declare void @use4(<2 x i4>) define <2 x i4> @n_oneuse_D (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @n_oneuse_D( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], splat (i4 -2) ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] ; CHECK-NEXT: call void @use4(<2 x i4> [[N0]]) ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -288,7 +288,7 @@ define <2 x i4> @n_oneuse_D (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @n_oneuse_A( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], splat (i4 -2) ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] ; CHECK-NEXT: call void @use4(<2 x i4> [[N1]]) ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -303,7 +303,7 @@ define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @n_oneuse_AD (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @n_oneuse_AD( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], splat (i4 -2) ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] ; CHECK-NEXT: call void @use4(<2 x i4> [[N0]]) ; CHECK-NEXT: call void @use4(<2 x i4> [[N1]]) @@ -337,7 +337,7 @@ define <2 x i4> @n_var_mask (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { define <2 x i4> @n_differenty(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @n_differenty( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], ; CHECK-NEXT: ret <2 x i4> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll index 17b32670ae9d7b1..a25ee985759b8c0 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll @@ -26,7 +26,7 @@ define i1 @t0_basic(i8 %x, i8 %y) { define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: call void @use2x8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[T0]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll index 677ef47456c0137..be0b0e0da562fb5 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll @@ -21,7 +21,7 @@ define i1 @t0_basic(i8 %x, i8 %y) { define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll index 457a0e594b63030..0bd34deb41d5fc0 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll @@ -26,7 +26,7 @@ define i1 @t0_basic(i8 %x, i8 %y) { define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: call void @use2x8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[T0]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll index 94966a1eba3289e..b4865218dbc1438 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll @@ -21,7 +21,7 @@ define i1 @t0_basic(i8 %x, i8 %y) { define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll index 89e065033da19f1..5297b3660f8789e 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll @@ -23,7 +23,7 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( ; CHECK-NEXT: [[MUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.umul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[MUL]], 1 -; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], +; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[MUL_NOT_OV]] ; %t0 = mul <2 x i8> %x, %y diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll index 241d9cbcde3382d..f7f343827e57619 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll @@ -22,7 +22,7 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( ; CHECK-NEXT: [[MUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.umul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[MUL]], 1 -; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], +; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[MUL_NOT_OV]] ; %t0 = udiv <2 x i8> , %x @@ -34,7 +34,7 @@ define <3 x i1> @t2_vec_poison(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @t2_vec_poison( ; CHECK-NEXT: [[MUL:%.*]] = call { <3 x i8>, <3 x i1> } @llvm.umul.with.overflow.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <3 x i8>, <3 x i1> } [[MUL]], 1 -; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <3 x i1> [[MUL_OV]], +; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <3 x i1> [[MUL_OV]], splat (i1 true) ; CHECK-NEXT: ret <3 x i1> [[MUL_NOT_OV]] ; %t0 = udiv <3 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll index ab147584d2108fb..1509a42872922af 100644 --- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll +++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -40,7 +40,7 @@ define i32 @usub_sat_C1_C2_produce_0_too(i32 %a){ ; vector tests define <2 x i16> @usub_sat_C1_C2_splat(<2 x i16> %a) { ; CHECK-LABEL: @usub_sat_C1_C2_splat( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> , <2 x i16> [[A:%.*]]) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> splat (i16 50), <2 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i16> [[COND]] ; %add = sub nuw <2 x i16> , %a diff --git a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll index 262942aa1219b8b..55bad9a57b86341 100644 --- a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll +++ b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll @@ -187,7 +187,7 @@ define i64 @t3_notrunc_redundant_sext(i64 %data, i64 %nbits) { define <2 x i32> @t4_vec(<2 x i64> %data, <2 x i32> %nbits) { ; CHECK-LABEL: @t4_vec( -; CHECK-NEXT: [[SKIP_HIGH:%.*]] = sub <2 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[SKIP_HIGH:%.*]] = sub <2 x i32> splat (i32 64), [[NBITS:%.*]] ; CHECK-NEXT: [[SKIP_HIGH_WIDE:%.*]] = zext nneg <2 x i32> [[SKIP_HIGH]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[DATA:%.*]], [[SKIP_HIGH_WIDE]] ; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll index f78a7055ff2f6c1..644f13f8eee6365 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll @@ -641,7 +641,7 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) { ;; indices. define ptr @PR41624(<2 x ptr> %a) { ; CHECK-LABEL: @PR41624( -; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0 ; CHECK-NEXT: ret ptr [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index 1fd7903307cef4f..a4b55e2b6cfec11 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -644,7 +644,7 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) { ;; indices. define ptr @PR41624(<2 x ptr> %a) { ; CHECK-LABEL: @PR41624( -; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0 ; CHECK-NEXT: ret ptr [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll index c43def83f58aca3..ecd5e49bd6b28e8 100644 --- a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll @@ -97,7 +97,7 @@ define void @nocopy(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], +; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], splat (i32 16) ; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll index 84657c9d43896d7..ef093709a98f58c 100644 --- a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll +++ b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll @@ -97,7 +97,7 @@ define void @nocopy(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], +; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], splat (i32 16) ; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll index 9f5f957f4944520..925f491d2984102 100644 --- a/llvm/test/Transforms/InstCombine/vec_sext.ll +++ b/llvm/test/Transforms/InstCombine/vec_sext.ll @@ -44,7 +44,7 @@ define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) define <2 x i32> @is_negative_poison_elt(<2 x i32> %a) { ; CHECK-LABEL: @is_negative_poison_elt( -; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[A_LOBIT]] ; %cmp = icmp slt <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll index 0f233fbb4729e6f..80fe4b78fe8a9f6 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll @@ -264,7 +264,7 @@ define <3 x i32> @add_wider(i32 %y, i32 %z) { ; CHECK-LABEL: @add_wider( ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], splat (i32 255) ; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; @@ -281,7 +281,7 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) { ; CHECK-LABEL: @div_wider( ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 -; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], +; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], splat (i32 255) ; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; @@ -378,7 +378,7 @@ define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) { define <1 x i32> @test16a(i32 %ele) { ; CHECK-LABEL: @test16a( -; CHECK-NEXT: ret <1 x i32> +; CHECK-NEXT: ret <1 x i32> splat (i32 2) ; %t0 = insertelement <2 x i32> , i32 %ele, i32 1 %t1 = shl <2 x i32> %t0, @@ -388,7 +388,7 @@ define <1 x i32> @test16a(i32 %ele) { define <4 x i8> @test16b(i8 %ele) { ; CHECK-LABEL: @test16b( -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 2) ; %t0 = insertelement <8 x i8> , i8 %ele, i32 6 %t1 = shl <8 x i8> %t0, @@ -702,7 +702,7 @@ define <4 x i16> @widening_shuffle_or(<2 x i16> %v) { define <4 x i32> @shuffle_17add2(<4 x i32> %v) { ; CHECK-LABEL: @shuffle_17add2( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> @@ -936,7 +936,7 @@ define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) { define <2 x i32> @urem_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @urem_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer @@ -958,7 +958,7 @@ define <2 x i32> @urem_splat_constant1(<2 x i32> %x) { define <2 x i32> @srem_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @srem_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer @@ -980,7 +980,7 @@ define <2 x i32> @srem_splat_constant1(<2 x i32> %x) { define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @udiv_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer @@ -1002,7 +1002,7 @@ define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) { define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @sdiv_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer @@ -1703,7 +1703,7 @@ define <3 x i8> @splat_assoc_or(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fdiv( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fdiv reassoc nsz <2 x float> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -1718,7 +1718,7 @@ define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) { define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fadd( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: call void @use(<2 x float> [[A]]) ; CHECK-NEXT: [[R:%.*]] = fadd fast <2 x float> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <2 x float> [[R]] @@ -1735,7 +1735,7 @@ define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) { define <3 x i32> @splat_assoc_and(<4 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @splat_assoc_and( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], splat (i32 42) ; CHECK-NEXT: [[R:%.*]] = and <3 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <3 x i32> [[R]] ; @@ -1751,7 +1751,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) { ; CHECK-LABEL: @splat_assoc_xor( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <5 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = xor <5 x i32> [[Y:%.*]], [[SPLATX]] -; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], splat (i32 42) ; CHECK-NEXT: ret <5 x i32> [[R]] ; %splatx = shufflevector <4 x i32> %x, <4 x i32> poison, <5 x i32> zeroinitializer @@ -1765,7 +1765,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) { define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_mul( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], splat (i32 42) ; CHECK-NEXT: [[R:%.*]] = mul <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 39a9db02eef2930..163d9c9557b2391 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -254,7 +254,7 @@ define <3 x i32> @add_wider(i32 %y, i32 %z) { ; CHECK-LABEL: @add_wider( ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], splat (i32 255) ; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; @@ -271,7 +271,7 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) { ; CHECK-LABEL: @div_wider( ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 -; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], +; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], splat (i32 255) ; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; @@ -368,7 +368,7 @@ define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) { define <1 x i32> @test16a(i32 %ele) { ; CHECK-LABEL: @test16a( -; CHECK-NEXT: ret <1 x i32> +; CHECK-NEXT: ret <1 x i32> splat (i32 2) ; %t0 = insertelement <2 x i32> , i32 %ele, i32 1 %t1 = shl <2 x i32> %t0, @@ -378,7 +378,7 @@ define <1 x i32> @test16a(i32 %ele) { define <4 x i8> @test16b(i8 %ele) { ; CHECK-LABEL: @test16b( -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 2) ; %t0 = insertelement <8 x i8> , i8 %ele, i32 6 %t1 = shl <8 x i8> %t0, @@ -696,7 +696,7 @@ define <4 x i16> @widening_shuffle_or(<2 x i16> %v) { define <4 x i32> @shuffle_17add2(<4 x i32> %v) { ; CHECK-LABEL: @shuffle_17add2( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> @@ -930,7 +930,7 @@ define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) { define <2 x i32> @urem_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @urem_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer @@ -952,7 +952,7 @@ define <2 x i32> @urem_splat_constant1(<2 x i32> %x) { define <2 x i32> @srem_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @srem_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer @@ -974,7 +974,7 @@ define <2 x i32> @srem_splat_constant1(<2 x i32> %x) { define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @udiv_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer @@ -996,7 +996,7 @@ define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) { define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @sdiv_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1708,7 +1708,7 @@ define <3 x i8> @splat_assoc_or(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fdiv( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fdiv reassoc nsz <2 x float> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -1723,7 +1723,7 @@ define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) { define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fadd( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: call void @use(<2 x float> [[A]]) ; CHECK-NEXT: [[R:%.*]] = fadd fast <2 x float> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <2 x float> [[R]] @@ -1740,7 +1740,7 @@ define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) { define <3 x i32> @splat_assoc_and(<4 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @splat_assoc_and( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], splat (i32 42) ; CHECK-NEXT: [[R:%.*]] = and <3 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <3 x i32> [[R]] ; @@ -1756,7 +1756,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) { ; CHECK-LABEL: @splat_assoc_xor( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <5 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = xor <5 x i32> [[Y:%.*]], [[SPLATX]] -; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], splat (i32 42) ; CHECK-NEXT: ret <5 x i32> [[R]] ; %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <5 x i32> zeroinitializer @@ -1770,7 +1770,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) { define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_mul( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], splat (i32 42) ; CHECK-NEXT: [[R:%.*]] = mul <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -2390,7 +2390,7 @@ define <2 x i32> @foldselect0(i1 %c) { ; Make sure we do not crash in this case. define <4 x float> @shuf_larger_length_vec_select(<2 x i1> %cond) { ; CHECK-LABEL: @shuf_larger_length_vec_select( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> zeroinitializer, <2 x float> +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> zeroinitializer, <2 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[SEL]], <2 x float> zeroinitializer, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SHUF]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll b/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll index 116d531050111f7..ea05d05bba9940c 100644 --- a/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll +++ b/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll @@ -1,16 +1,26 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s define <8 x i16> @udiv_vec8x16(<8 x i16> %var) { +; CHECK-LABEL: define <8 x i16> @udiv_vec8x16( +; CHECK-SAME: <8 x i16> [[VAR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[VAR]], splat (i16 5) +; CHECK-NEXT: ret <8 x i16> [[TMP0]] +; entry: -; CHECK: lshr <8 x i16> %var, %0 = udiv <8 x i16> %var, ret <8 x i16> %0 } define <4 x i32> @udiv_vec4x32(<4 x i32> %var) { +; CHECK-LABEL: define <4 x i32> @udiv_vec4x32( +; CHECK-SAME: <4 x i32> [[VAR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[VAR]], splat (i32 4) +; CHECK-NEXT: ret <4 x i32> [[TMP0]] +; entry: -; CHECK: lshr <4 x i32> %var, %0 = udiv <4 x i32> %var, ret <4 x i32> %0 } diff --git a/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll index a87364600ba308e..5770e2ce72020da 100644 --- a/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll @@ -52,8 +52,8 @@ define <2 x i1> @and_cmp_is_trunc_even_with_poison_elts(<2 x i64> %a) { ; The ashr turns into an lshr. define <2 x i64> @test2(<2 x i64> %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[B]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 1) +; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[B]], splat (i64 32767) ; CHECK-NEXT: ret <2 x i64> [[T]] ; %b = and <2 x i64> %a, @@ -151,7 +151,7 @@ define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) { define void @convert(ptr %dst.addr, <2 x i64> %src) { ; CHECK-LABEL: @convert( ; CHECK-NEXT: [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32> -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[ADD]], ptr [[DST_ADDR:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -163,7 +163,7 @@ define void @convert(ptr %dst.addr, <2 x i64> %src) { define <2 x i65> @foo(<2 x i64> %t) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], +; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], splat (i64 4294967295) ; CHECK-NEXT: [[B:%.*]] = zext nneg <2 x i64> [[A_MASK]] to <2 x i65> ; CHECK-NEXT: ret <2 x i65> [[B]] ; @@ -175,7 +175,7 @@ define <2 x i65> @foo(<2 x i64> %t) { define <2 x i64> @bar(<2 x i65> %t) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> -; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i65> %t to <2 x i32> @@ -196,8 +196,8 @@ define <2 x i64> @bars(<2 x i65> %t) { define <2 x i64> @quxs(<2 x i64> %t) { ; CHECK-LABEL: @quxs( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i64> %t to <2 x i32> @@ -207,8 +207,8 @@ define <2 x i64> @quxs(<2 x i64> %t) { define <2 x i64> @quxt(<2 x i64> %t) { ; CHECK-LABEL: @quxt( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = shl <2 x i64> %t, @@ -278,7 +278,7 @@ define <4 x float> @f(i32 %a) { define <8 x i32> @pr24458(<8 x float> %n) { ; CHECK-LABEL: @pr24458( -; CHECK-NEXT: ret <8 x i32> +; CHECK-NEXT: ret <8 x i32> splat (i32 -1) ; %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/vector-casts.ll b/llvm/test/Transforms/InstCombine/vector-casts.ll index fd2a4ffdfb70927..56e957d54307898 100644 --- a/llvm/test/Transforms/InstCombine/vector-casts.ll +++ b/llvm/test/Transforms/InstCombine/vector-casts.ll @@ -52,8 +52,8 @@ define <2 x i1> @and_cmp_is_trunc_even_with_poison_elts(<2 x i64> %a) { ; The ashr turns into an lshr. define <2 x i64> @test2(<2 x i64> %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[B]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 1) +; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[B]], splat (i64 32767) ; CHECK-NEXT: ret <2 x i64> [[T]] ; %b = and <2 x i64> %a, @@ -151,7 +151,7 @@ define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) { define void @convert(ptr %dst.addr, <2 x i64> %src) { ; CHECK-LABEL: @convert( ; CHECK-NEXT: [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32> -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[ADD]], ptr [[DST_ADDR:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -163,7 +163,7 @@ define void @convert(ptr %dst.addr, <2 x i64> %src) { define <2 x i65> @foo(<2 x i64> %t) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], +; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], splat (i64 4294967295) ; CHECK-NEXT: [[B:%.*]] = zext nneg <2 x i64> [[A_MASK]] to <2 x i65> ; CHECK-NEXT: ret <2 x i65> [[B]] ; @@ -175,7 +175,7 @@ define <2 x i65> @foo(<2 x i64> %t) { define <2 x i64> @bar(<2 x i65> %t) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> -; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i65> %t to <2 x i32> @@ -196,8 +196,8 @@ define <2 x i64> @bars(<2 x i65> %t) { define <2 x i64> @quxs(<2 x i64> %t) { ; CHECK-LABEL: @quxs( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i64> %t to <2 x i32> @@ -207,8 +207,8 @@ define <2 x i64> @quxs(<2 x i64> %t) { define <2 x i64> @quxt(<2 x i64> %t) { ; CHECK-LABEL: @quxt( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = shl <2 x i64> %t, @@ -278,7 +278,7 @@ define <4 x float> @f(i32 %a) { define <8 x i32> @pr24458(<8 x float> %n) { ; CHECK-LABEL: @pr24458( -; CHECK-NEXT: ret <8 x i32> +; CHECK-NEXT: ret <8 x i32> splat (i32 -1) ; %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/vector-mul.ll b/llvm/test/Transforms/InstCombine/vector-mul.ll index 4640e19011b01e0..2b66a8b12bfce7a 100644 --- a/llvm/test/Transforms/InstCombine/vector-mul.ll +++ b/llvm/test/Transforms/InstCombine/vector-mul.ll @@ -27,7 +27,7 @@ entry: define <4 x i8> @AddToSelf_i8(<4 x i8> %InVec) { ; CHECK-LABEL: @AddToSelf_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 1) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -38,7 +38,7 @@ entry: define <4 x i8> @SplatPow2Test1_i8(<4 x i8> %InVec) { ; CHECK-LABEL: @SplatPow2Test1_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 2) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -49,7 +49,7 @@ entry: define <4 x i8> @SplatPow2Test2_i8(<4 x i8> %InVec) { ; CHECK-LABEL: @SplatPow2Test2_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 3) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -71,7 +71,7 @@ entry: define <4 x i8> @MulTest2_i8(<4 x i8> %InVec) { ; CHECK-LABEL: @MulTest2_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], splat (i8 3) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -124,7 +124,7 @@ entry: define <4 x i16> @AddToSelf_i16(<4 x i16> %InVec) { ; CHECK-LABEL: @AddToSelf_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -135,7 +135,7 @@ entry: define <4 x i16> @SplatPow2Test1_i16(<4 x i16> %InVec) { ; CHECK-LABEL: @SplatPow2Test1_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 2) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -146,7 +146,7 @@ entry: define <4 x i16> @SplatPow2Test2_i16(<4 x i16> %InVec) { ; CHECK-LABEL: @SplatPow2Test2_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 3) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -168,7 +168,7 @@ entry: define <4 x i16> @MulTest2_i16(<4 x i16> %InVec) { ; CHECK-LABEL: @MulTest2_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], splat (i16 3) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -221,7 +221,7 @@ entry: define <4 x i32> @AddToSelf_i32(<4 x i32> %InVec) { ; CHECK-LABEL: @AddToSelf_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -232,7 +232,7 @@ entry: define <4 x i32> @SplatPow2Test1_i32(<4 x i32> %InVec) { ; CHECK-LABEL: @SplatPow2Test1_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 2) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -243,7 +243,7 @@ entry: define <4 x i32> @SplatPow2Test2_i32(<4 x i32> %InVec) { ; CHECK-LABEL: @SplatPow2Test2_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 3) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -265,7 +265,7 @@ entry: define <4 x i32> @MulTest2_i32(<4 x i32> %InVec) { ; CHECK-LABEL: @MulTest2_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], splat (i32 3) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -318,7 +318,7 @@ entry: define <4 x i64> @AddToSelf_i64(<4 x i64> %InVec) { ; CHECK-LABEL: @AddToSelf_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 1) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: @@ -329,7 +329,7 @@ entry: define <4 x i64> @SplatPow2Test1_i64(<4 x i64> %InVec) { ; CHECK-LABEL: @SplatPow2Test1_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 2) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: @@ -340,7 +340,7 @@ entry: define <4 x i64> @SplatPow2Test2_i64(<4 x i64> %InVec) { ; CHECK-LABEL: @SplatPow2Test2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 3) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: @@ -362,7 +362,7 @@ entry: define <4 x i64> @MulTest2_i64(<4 x i64> %InVec) { ; CHECK-LABEL: @MulTest2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], splat (i64 3) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: @@ -399,7 +399,7 @@ entry: define <4 x i8> @ShiftMulTest1(<4 x i8> %InVec) { ; CHECK-LABEL: @ShiftMulTest1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], splat (i8 12) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -411,7 +411,7 @@ entry: define <4 x i16> @ShiftMulTest2(<4 x i16> %InVec) { ; CHECK-LABEL: @ShiftMulTest2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], splat (i16 12) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -423,7 +423,7 @@ entry: define <4 x i32> @ShiftMulTest3(<4 x i32> %InVec) { ; CHECK-LABEL: @ShiftMulTest3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], splat (i32 12) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -435,7 +435,7 @@ entry: define <4 x i64> @ShiftMulTest4(<4 x i64> %InVec) { ; CHECK-LABEL: @ShiftMulTest4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], splat (i64 12) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll index 65d000835326211..5e141f6aacaf2be 100644 --- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll +++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll @@ -16,7 +16,7 @@ define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) { define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_umax_non_zero_fail( -; CHECK-NEXT: [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], splat (i8 1) ; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) ; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 ; CHECK-NEXT: ret i1 [[R]] diff --git a/llvm/test/Transforms/InstCombine/vector-trunc.ll b/llvm/test/Transforms/InstCombine/vector-trunc.ll index bccb12e66eba194..85d6de4a3977c2e 100644 --- a/llvm/test/Transforms/InstCombine/vector-trunc.ll +++ b/llvm/test/Transforms/InstCombine/vector-trunc.ll @@ -3,9 +3,9 @@ define <4 x i16> @trunc_add_nsw(<4 x i32> %0) { ; CHECK-LABEL: @trunc_add_nsw( -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], splat (i32 17) ; CHECK-NEXT: [[TMP3:%.*]] = trunc nsw <4 x i32> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[TMP3]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[TMP4]] ; %2 = ashr <4 x i32> %0, @@ -16,9 +16,9 @@ define <4 x i16> @trunc_add_nsw(<4 x i32> %0) { define <4 x i16> @trunc_add_no_nsw(<4 x i32> %0) { ; CHECK-LABEL: @trunc_add_no_nsw( -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP0:%.*]], splat (i32 16) ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i32> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[TMP4]] ; %2 = ashr <4 x i32> %0, @@ -31,7 +31,7 @@ define <4 x i16> @trunc_add_mixed(<4 x i32> %0) { ; CHECK-LABEL: @trunc_add_mixed( ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[TMP4]] ; %2 = ashr <4 x i32> %0, diff --git a/llvm/test/Transforms/InstCombine/vector-udiv.ll b/llvm/test/Transforms/InstCombine/vector-udiv.ll index 0289b7c70cc4fbb..dd4976f12a036ab 100644 --- a/llvm/test/Transforms/InstCombine/vector-udiv.ll +++ b/llvm/test/Transforms/InstCombine/vector-udiv.ll @@ -3,7 +3,7 @@ define <4 x i32> @test_v4i32_splatconst_pow2(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_splatconst_pow2( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = udiv <4 x i32> %a0, @@ -22,7 +22,7 @@ define <4 x i32> @test_v4i32_const_pow2(<4 x i32> %a0) { ; X udiv C, where C >= signbit define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_negconstsplat( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A0:%.*]], splat (i32 -4) ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -51,7 +51,7 @@ define <4 x i32> @test_v4i32_negconst_undef(<4 x i32> %a0) { ; X udiv (C1 << N), where C1 is "1< X >> (N+C2) define <4 x i32> @test_v4i32_shl_splatconst_pow2(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @test_v4i32_shl_splatconst_pow2( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -74,7 +74,7 @@ define <4 x i32> @test_v4i32_shl_const_pow2(<4 x i32> %a0, <4 x i32> %a1) { ; X udiv (zext (C1 << N)), where C1 is "1< X >> (N+C2) define <4 x i32> @test_v4i32_zext_shl_splatconst_pow2(<4 x i32> %a0, <4 x i16> %a1) { ; CHECK-LABEL: @test_v4i32_zext_shl_splatconst_pow2( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[A1:%.*]], splat (i16 2) ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg <4 x i16> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] diff --git a/llvm/test/Transforms/InstCombine/vector-urem.ll b/llvm/test/Transforms/InstCombine/vector-urem.ll index 627789a03ef6ca5..d087bbbe06abf61 100644 --- a/llvm/test/Transforms/InstCombine/vector-urem.ll +++ b/llvm/test/Transforms/InstCombine/vector-urem.ll @@ -3,7 +3,7 @@ define <4 x i32> @test_v4i32_splatconst_pow2(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_splatconst_pow2( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = urem <4 x i32> %a0, @@ -29,7 +29,7 @@ define <4 x i32> @test_v4i32_const_pow2_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_one(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_one( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -39,7 +39,7 @@ define <4 x i32> @test_v4i32_one(<4 x i32> %a0) { define <4 x i32> @test_v4i32_one_poison(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_one_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -50,8 +50,8 @@ define <4 x i32> @test_v4i32_one_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_negconstsplat( ; CHECK-NEXT: [[A0_FR:%.*]] = freeze <4 x i32> [[A0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], splat (i32 -3) +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], splat (i32 3) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0_FR]], <4 x i32> [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/vector-xor.ll b/llvm/test/Transforms/InstCombine/vector-xor.ll index 13894ef85b5da81..3b37684df9e36a6 100644 --- a/llvm/test/Transforms/InstCombine/vector-xor.ll +++ b/llvm/test/Transforms/InstCombine/vector-xor.ll @@ -33,7 +33,7 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) define <4 x i32> @test_v4i32_xor_bswap_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_bswap_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A0:%.*]], splat (i32 -16777216) ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -68,7 +68,7 @@ define <4 x i32> @test_v4i32_xor_bswap_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_demorgan_and(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @test_v4i32_demorgan_and( -; CHECK-NEXT: [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[X:%.*]], [[Y_NOT]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; @@ -82,7 +82,7 @@ define <4 x i32> @test_v4i32_demorgan_and(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @test_v4i32_demorgan_or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @test_v4i32_demorgan_or( -; CHECK-NEXT: [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], [[Y_NOT]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; @@ -120,7 +120,7 @@ define <4 x i32> @test_v4i32_not_ashr_not_poison(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @test_v4i32_not_ashr_negative_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_not_ashr_negative_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> , [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> splat (i32 2), [[A0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = ashr <4 x i32> , %a0 @@ -152,7 +152,7 @@ define <4 x i32> @test_v4i32_not_ashr_negative_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_not_lshr_nonnegative_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_not_lshr_nonnegative_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> , [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> splat (i32 -4), [[A0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = lshr <4 x i32> , %a0 @@ -184,7 +184,7 @@ define <4 x i32> @test_v4i32_not_lshr_nonnegative_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_not_sub_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_not_sub_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], splat (i32 -4) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = sub <4 x i32> , %a0 @@ -216,7 +216,7 @@ define <4 x i32> @test_v4i32_not_sub_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_xor_signmask_sub_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_signmask_sub_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> , [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> splat (i32 -2147483645), [[A0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = sub <4 x i32> , %a0 @@ -227,7 +227,7 @@ define <4 x i32> @test_v4i32_xor_signmask_sub_splatconst(<4 x i32> %a0) { define <4 x i32> @test_v4i32_xor_signmask_sub_const(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_signmask_sub_const( ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> , [[A0:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -2147483648) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = sub <4 x i32> , %a0 @@ -250,7 +250,7 @@ define <4 x i32> @test_v4i32_xor_signmask_sub_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_xor_signmask_add_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_signmask_add_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], splat (i32 -2147483645) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = add <4 x i32> , %a0 @@ -261,7 +261,7 @@ define <4 x i32> @test_v4i32_xor_signmask_add_splatconst(<4 x i32> %a0) { define <4 x i32> @test_v4i32_xor_signmask_add_const(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_signmask_add_const( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -2147483648) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = add <4 x i32> , %a0 diff --git a/llvm/test/Transforms/InstCombine/with_overflow.ll b/llvm/test/Transforms/InstCombine/with_overflow.ll index 4e3cb34931d07e6..fa810408730e1bc 100644 --- a/llvm/test/Transforms/InstCombine/with_overflow.ll +++ b/llvm/test/Transforms/InstCombine/with_overflow.ll @@ -616,7 +616,7 @@ declare { <4 x i8>, <4 x i1> } @llvm.umul.with.overflow.v4i8(<4 x i8>, <4 x i8>) define { <4 x i8>, <4 x i1> } @always_sadd_const_vector() nounwind { ; CHECK-LABEL: @always_sadd_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> , <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -128), <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.sadd.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -624,7 +624,7 @@ define { <4 x i8>, <4 x i1> } @always_sadd_const_vector() nounwind { define { <4 x i8>, <4 x i1> } @always_uadd_const_vector() nounwind { ; CHECK-LABEL: @always_uadd_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> zeroinitializer, <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> zeroinitializer, <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.uadd.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -632,7 +632,7 @@ define { <4 x i8>, <4 x i1> } @always_uadd_const_vector() nounwind { define { <4 x i8>, <4 x i1> } @always_ssub_const_vector() nounwind { ; CHECK-LABEL: @always_ssub_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> , <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 127), <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.ssub.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -640,7 +640,7 @@ define { <4 x i8>, <4 x i1> } @always_ssub_const_vector() nounwind { define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind { ; CHECK-LABEL: @always_usub_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> , <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -1), <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -649,7 +649,7 @@ define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind { ; NOTE: LLVM doesn't (yet) detect the multiplication always results in a overflow define { <4 x i8>, <4 x i1> } @always_smul_const_vector() nounwind { ; CHECK-LABEL: @always_smul_const_vector( -; CHECK-NEXT: [[X:%.*]] = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> , <4 x i8> ) +; CHECK-NEXT: [[X:%.*]] = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> splat (i8 127), <4 x i8> splat (i8 3)) ; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } [[X]] ; %x = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> , <4 x i8> ) @@ -658,7 +658,7 @@ define { <4 x i8>, <4 x i1> } @always_smul_const_vector() nounwind { define { <4 x i8>, <4 x i1> } @always_umul_const_vector() nounwind { ; CHECK-LABEL: @always_umul_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> , <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -3), <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.umul.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -781,7 +781,7 @@ define i8 @smul_neg1(i8 %x, ptr %p) { define <4 x i8> @smul_neg1_vec(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @smul_neg1_vec( ; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], +; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -795,7 +795,7 @@ define <4 x i8> @smul_neg1_vec(<4 x i8> %x, ptr %p) { define <4 x i8> @smul_neg1_vec_poison(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @smul_neg1_vec_poison( ; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], +; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -838,7 +838,7 @@ define i8 @umul_neg1(i8 %x, ptr %p) { define <4 x i8> @umul_neg1_vec(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @umul_neg1_vec( ; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 1) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -852,7 +852,7 @@ define <4 x i8> @umul_neg1_vec(<4 x i8> %x, ptr %p) { define <4 x i8> @umul_neg1_vec_poison(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @umul_neg1_vec_poison( ; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 1) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -865,8 +865,8 @@ define <4 x i8> @umul_neg1_vec_poison(<4 x i8> %x, ptr %p) { define <4 x i1> @smul_not_neg1_vec(<4 x i8> %x) { ; CHECK-LABEL: @smul_not_neg1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X:%.*]], -; CHECK-NEXT: [[OV:%.*]] = icmp ult <4 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X:%.*]], splat (i8 -43) +; CHECK-NEXT: [[OV:%.*]] = icmp ult <4 x i8> [[TMP1]], splat (i8 -85) ; CHECK-NEXT: ret <4 x i1> [[OV]] ; %m = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> %x, <4 x i8> ) @@ -945,8 +945,8 @@ define i8 @umul_256(i8 %x, ptr %p) { define <4 x i8> @umul_4_vec_poison(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @umul_4_vec_poison( -; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], splat (i8 2) +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 63) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -1035,8 +1035,8 @@ define i8 @smul_128(i8 %x, ptr %p) { define <4 x i8> @smul_2_vec_poison(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @smul_2_vec_poison( -; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X]], splat (i8 64) ; CHECK-NEXT: [[OV:%.*]] = icmp slt <4 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] diff --git a/llvm/test/Transforms/InstCombine/xor-and-or.ll b/llvm/test/Transforms/InstCombine/xor-and-or.ll index feba47912b1dac5..47275ce31070b54 100644 --- a/llvm/test/Transforms/InstCombine/xor-and-or.ll +++ b/llvm/test/Transforms/InstCombine/xor-and-or.ll @@ -52,7 +52,7 @@ define i1 @xor_logic_and_logic_or4(i1 %c, i1 %x, i1 %y) { define <3 x i1> @xor_logic_and_logic_or_vector1(<3 x i1> %c, <3 x i1> %x, <3 x i1> %y) { ; CHECK-LABEL: @xor_logic_and_logic_or_vector1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -65,7 +65,7 @@ define <3 x i1> @xor_logic_and_logic_or_vector1(<3 x i1> %c, <3 x i1> %x, <3 x i define <3 x i1> @xor_logic_and_logic_or_vector2(<3 x i1> %c, <3 x i1> %x, <3 x i1> %y) { ; CHECK-LABEL: @xor_logic_and_logic_or_vector2( ; CHECK-NEXT: [[TMP1:%.*]] = freeze <3 x i1> [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <3 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <3 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i1> [[TMP2]], <3 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -89,7 +89,7 @@ define <3 x i1> @xor_logic_and_logic_or_vector_poison1(<3 x i1> %c, <3 x i1> %x, define <3 x i1> @xor_logic_and_logic_or_vector_poison2(<3 x i1> %c, <3 x i1> %x, <3 x i1> %y) { ; CHECK-LABEL: @xor_logic_and_logic_or_vector_poison2( -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -125,7 +125,7 @@ define i1 @xor_and_logic_or2(i1 %c, i1 %x, i1 %y) { define <2 x i1> @xor_and_logic_or_vector(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @xor_and_logic_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> [[TMP1]], <2 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -174,7 +174,7 @@ define i1 @xor_logic_and_or2(i1 %c, i1 %x, i1 %y) { define <2 x i1> @xor_logic_and_or_vector(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @xor_logic_and_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> [[TMP1]], <2 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -215,7 +215,7 @@ define i1 @xor_and_or(i1 %c, i1 %x, i1 %y) { ;; and/or/xor on most backend, do we really need to do this transform? define <4 x i1> @xor_and_or_vector(<4 x i1> %c, <4 x i1> %x, <4 x i1> %y) { ; CHECK-LABEL: @xor_and_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP1]], <4 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <4 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/xor-ashr.ll b/llvm/test/Transforms/InstCombine/xor-ashr.ll index 6f501184b63574d..0c0554adcf12303 100644 --- a/llvm/test/Transforms/InstCombine/xor-ashr.ll +++ b/llvm/test/Transforms/InstCombine/xor-ashr.ll @@ -53,8 +53,8 @@ define i128 @testi128i128(i128 %add) { define <4 x i8> @testv4i16i8(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], -; CHECK-NEXT: [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> , <4 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], splat (i16 -1) +; CHECK-NEXT: [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 27), <4 x i8> splat (i8 -28) ; CHECK-NEXT: ret <4 x i8> [[X]] ; %sh = ashr <4 x i16> %add, @@ -65,7 +65,7 @@ define <4 x i8> @testv4i16i8(<4 x i16> %add) { define <4 x i8> @testv4i16i8_poison(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], splat (i16 -1) ; CHECK-NEXT: [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> , <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[X]] ; @@ -94,7 +94,7 @@ define i8 @wrongimm(i16 %add) { define <4 x i32> @vectorpoison(<6 x i32> %0) { ; CHECK-LABEL: @vectorpoison( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <6 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <6 x i32> [[TMP0:%.*]], splat (i32 -1) ; CHECK-NEXT: [[SHR:%.*]] = sext <6 x i1> [[ISNOTNEG]] to <6 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i32> [[SHR]], <6 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll index f104cd7fdcada55..0384c1aa184b824 100644 --- a/llvm/test/Transforms/InstCombine/xor-icmps.ll +++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll @@ -113,7 +113,7 @@ define i1 @slt_zero_sgt_minus1(i4 %x, i4 %y) { define <2 x i1> @sgt_minus1_slt_zero_sgt(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @sgt_minus1_slt_zero_sgt( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i4> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i4> [[TMP1]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %i1 = icmp sgt <2 x i4> %x, diff --git a/llvm/test/Transforms/InstCombine/xor-of-or.ll b/llvm/test/Transforms/InstCombine/xor-of-or.ll index 4f27303dcec302f..1be3bc33af85b06 100644 --- a/llvm/test/Transforms/InstCombine/xor-of-or.ll +++ b/llvm/test/Transforms/InstCombine/xor-of-or.ll @@ -42,8 +42,8 @@ define i4 @t2(i4 %x) { ; Splat constants are fine too. define <2 x i4> @t3(<2 x i4> %x) { ; CHECK-LABEL: @t3( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[I1:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 3) +; CHECK-NEXT: [[I1:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 6) ; CHECK-NEXT: ret <2 x i4> [[I1]] ; %i0 = or <2 x i4> %x, @@ -55,7 +55,7 @@ define <2 x i4> @t3(<2 x i4> %x) { define <2 x i4> @t4(<2 x i4> %x) { ; CHECK-LABEL: @t4( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[I1:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[I1:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 6) ; CHECK-NEXT: ret <2 x i4> [[I1]] ; %i0 = or <2 x i4> %x, diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index 8308dc503d4da25..3abaf74285cc050 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -111,7 +111,7 @@ define i1 @test9(i8 %A) { define <2 x i1> @test9vec(<2 x i8> %a) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 89) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = xor <2 x i8> %a, @@ -153,7 +153,7 @@ define i1 @test12(i8 %A) { define <2 x i1> @test12vec(<2 x i8> %a) { ; CHECK-LABEL: @test12vec( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = xor <2 x i8> %a, @@ -222,7 +222,7 @@ define i32 @fold_zext_xor_sandwich(i1 %X) { define <2 x i32> @fold_zext_xor_sandwich_vec(<2 x i1> %X) { ; CHECK-LABEL: @fold_zext_xor_sandwich_vec( ; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[Q:%.*]] = xor <2 x i32> [[Z]], +; CHECK-NEXT: [[Q:%.*]] = xor <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[Q]] ; %Y = xor <2 x i1> %X, @@ -287,7 +287,7 @@ define i32 @test28(i32 %indvar) { define <2 x i32> @test28vec(<2 x i32> %indvar) { ; CHECK-LABEL: @test28vec( -; CHECK-NEXT: [[T214:%.*]] = add <2 x i32> [[INDVAR:%.*]], +; CHECK-NEXT: [[T214:%.*]] = add <2 x i32> [[INDVAR:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T214]] ; %t7 = add <2 x i32> %indvar, @@ -307,7 +307,7 @@ define i32 @test28_sub(i32 %indvar) { define <2 x i32> @test28_subvec(<2 x i32> %indvar) { ; CHECK-LABEL: @test28_subvec( -; CHECK-NEXT: [[T214:%.*]] = sub <2 x i32> , [[INDVAR:%.*]] +; CHECK-NEXT: [[T214:%.*]] = sub <2 x i32> splat (i32 1), [[INDVAR:%.*]] ; CHECK-NEXT: ret <2 x i32> [[T214]] ; %t7 = sub <2 x i32> , %indvar @@ -327,7 +327,7 @@ define i32 @test29(i1 %C) { define <2 x i32> @test29vec(i1 %C) { ; CHECK-LABEL: @test29vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 915), <2 x i32> splat (i32 113) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -374,7 +374,7 @@ define <2 x i32> @test30vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 915), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A]] ; entry: @@ -759,7 +759,7 @@ define i32 @test45(i32 %x, i32 %y) { ; Check that we work with splat vectors also. define <4 x i32> @test46(<4 x i32> %x) { ; CHECK-LABEL: @test46( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 255)) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = xor <4 x i32> %x, @@ -803,8 +803,8 @@ define i32 @test48(i32 %x) { define <2 x i32> @test48vec(<2 x i32> %x) { ; CHECK-LABEL: @test48vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[D:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[D:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %a = sub <2 x i32> , %x @@ -829,7 +829,7 @@ define i32 @test49(i32 %x) { define <2 x i32> @test49vec(<2 x i32> %x) { ; CHECK-LABEL: @test49vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> splat (i32 1), [[X:%.*]] ; CHECK-NEXT: [[D:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> zeroinitializer) ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -857,8 +857,8 @@ define i32 @test50(i32 %x, i32 %y) { define <2 x i32> @test50vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test50vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> splat (i32 1), [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 1) ; CHECK-NEXT: [[E:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) ; CHECK-NEXT: ret <2 x i32> [[E]] ; @@ -887,8 +887,8 @@ define i32 @test51(i32 %x, i32 %y) { define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test51vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> splat (i32 -3), [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 -3) ; CHECK-NEXT: [[E:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) ; CHECK-NEXT: ret <2 x i32> [[E]] ; @@ -941,7 +941,7 @@ define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { define <2 x i4> @or_or_xor_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @or_or_xor_commute3( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[Z:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[Z:%.*]], splat (i4 -1) ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -1011,8 +1011,8 @@ define i8 @not_shl(i8 %x) { define <2 x i8> @not_shl_vec(<2 x i8> %x) { ; CHECK-LABEL: @not_shl_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shl <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) +; CHECK-NEXT: [[R:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 5) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = shl <2 x i8> %x, @@ -1061,7 +1061,7 @@ define i8 @not_lshr(i8 %x) { define <2 x i8> @not_lshr_vec(<2 x i8> %x) { ; CHECK-LABEL: @not_lshr_vec( -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[ISNOTNEG]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1124,7 +1124,7 @@ define i8 @not_ashr(i8 %x) { define <2 x i8> @not_ashr_vec(<2 x i8> %x) { ; CHECK-LABEL: @not_ashr_vec( -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[ISNOTNEG]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1222,7 +1222,7 @@ define i32 @xor_andn_commute4(i32 %pa, i32 %pb) { define <2 x i64> @xor_orn(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @xor_orn( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[Z:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[Z:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i64> [[Z]] ; %nota = xor <2 x i64> %a, @@ -1350,7 +1350,7 @@ define i32 @ctlz_pow2(i32 %x) { define <2 x i8> @cttz_pow2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @cttz_pow2( -; CHECK-NEXT: [[S:%.*]] = shl nuw <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = shl nuw <2 x i8> splat (i8 1), [[X:%.*]] ; CHECK-NEXT: [[D:%.*]] = udiv exact <2 x i8> [[S]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[D]], i1 true) ; CHECK-NEXT: ret <2 x i8> [[R]] @@ -1561,10 +1561,10 @@ entry: define <2 x i32> @select_or_disjoint_xor_vec(<2 x i32> %a, i1 %c) { ; CHECK-LABEL: @select_or_disjoint_xor_vec( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 4) +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 4) ; CHECK-NEXT: [[OR:%.*]] = or disjoint <2 x i32> [[S]], [[SHL]] -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[OR]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[OR]], splat (i32 4) ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; entry: @@ -1595,10 +1595,10 @@ entry: define <2 x i32> @select_or_disjoint_or_vec(<2 x i32> %a, i1 %c) { ; CHECK-LABEL: @select_or_disjoint_or_vec( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 4) +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 4) ; CHECK-NEXT: [[OR:%.*]] = or disjoint <2 x i32> [[S]], [[SHL]] -; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <2 x i32> [[OR]], +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <2 x i32> [[OR]], splat (i32 4) ; CHECK-NEXT: ret <2 x i32> [[ADD]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/xor2.ll b/llvm/test/Transforms/InstCombine/xor2.ll index 0b4fca76ed0a7f4..641f32c6529e627 100644 --- a/llvm/test/Transforms/InstCombine/xor2.ll +++ b/llvm/test/Transforms/InstCombine/xor2.ll @@ -586,7 +586,7 @@ define <3 x i5> @not_xor_to_or_not_vector(<3 x i5> %a, <3 x i5> %b, <3 x i5> %c) ; CHECK-LABEL: @not_xor_to_or_not_vector( ; CHECK-NEXT: [[OR:%.*]] = or <3 x i5> [[B:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <3 x i5> [[A:%.*]], [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], splat (i5 -1) ; CHECK-NEXT: [[NOT:%.*]] = or <3 x i5> [[AND]], [[TMP1]] ; CHECK-NEXT: ret <3 x i5> [[NOT]] ; @@ -601,7 +601,7 @@ define <3 x i5> @not_xor_to_or_not_vector_poison(<3 x i5> %a, <3 x i5> %b, <3 x ; CHECK-LABEL: @not_xor_to_or_not_vector_poison( ; CHECK-NEXT: [[OR:%.*]] = or <3 x i5> [[B:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <3 x i5> [[A:%.*]], [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], splat (i5 -1) ; CHECK-NEXT: [[NOT:%.*]] = or <3 x i5> [[AND]], [[TMP1]] ; CHECK-NEXT: ret <3 x i5> [[NOT]] ; @@ -697,7 +697,7 @@ define <3 x i5> @xor_notand_to_or_not_vector(<3 x i5> %a, <3 x i5> %b, <3 x i5> ; CHECK-LABEL: @xor_notand_to_or_not_vector( ; CHECK-NEXT: [[OR:%.*]] = or <3 x i5> [[B:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <3 x i5> [[A:%.*]], [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], splat (i5 -1) ; CHECK-NEXT: [[XOR:%.*]] = or <3 x i5> [[AND]], [[TMP1]] ; CHECK-NEXT: ret <3 x i5> [[XOR]] ; @@ -712,7 +712,7 @@ define <3 x i5> @xor_notand_to_or_not_vector_poison(<3 x i5> %a, <3 x i5> %b, <3 ; CHECK-LABEL: @xor_notand_to_or_not_vector_poison( ; CHECK-NEXT: [[OR:%.*]] = or <3 x i5> [[B:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <3 x i5> [[A:%.*]], [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], splat (i5 -1) ; CHECK-NEXT: [[XOR:%.*]] = or <3 x i5> [[AND]], [[TMP1]] ; CHECK-NEXT: ret <3 x i5> [[XOR]] ; diff --git a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll index c9da18d3d88bdb9..271c303664b7203 100644 --- a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll @@ -73,7 +73,7 @@ define i32 @zext_add_scalar(i1 %x) { define <2 x i32> @zext_add_vec_splat(<2 x i1> %x) { ; CHECK-LABEL: @zext_add_vec_splat( -; CHECK-NEXT: [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 43), <2 x i32> splat (i32 42) ; CHECK-NEXT: ret <2 x i32> [[ADD]] ; %zext = zext <2 x i1> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll index 384ff8d2b7a3a3f..97412d6ad5f8de1 100644 --- a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll +++ b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll @@ -28,7 +28,7 @@ define i16 @trunc_ctlz_zext_i16_i32(i16 %x) { define <2 x i8> @trunc_ctlz_zext_v2i8_v2i33(<2 x i8> %x) { ; CHECK-LABEL: @trunc_ctlz_zext_v2i8_v2i33( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) -; CHECK-NEXT: [[ZZ:%.*]] = add nuw nsw <2 x i8> [[TMP1]], +; CHECK-NEXT: [[ZZ:%.*]] = add nuw nsw <2 x i8> [[TMP1]], splat (i8 25) ; CHECK-NEXT: ret <2 x i8> [[ZZ]] ; %z = zext <2 x i8> %x to <2 x i33> diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll index 872871cf15b033a..e4d18e939521977 100644 --- a/llvm/test/Transforms/InstCombine/zext.ll +++ b/llvm/test/Transforms/InstCombine/zext.ll @@ -20,7 +20,7 @@ define i64 @test_sext_zext(i16 %A) { define <2 x i64> @test2(<2 x i1> %A) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i1> [[XOR]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ZEXT]] ; @@ -67,7 +67,7 @@ define i64 @fold_xor_zext_sandwich(i1 %a) { define <2 x i64> @fold_xor_zext_sandwich_vec(<2 x i1> %a) { ; CHECK-LABEL: @fold_xor_zext_sandwich_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[ZEXT2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ZEXT2]] ; @@ -194,9 +194,9 @@ define i32 @masked_bit_set(i32 %x, i32 %y) { define <2 x i32> @masked_bit_clear(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @masked_bit_clear( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP2]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %sh1 = shl <2 x i32> , %y @@ -210,7 +210,7 @@ define <2 x i32> @masked_bit_set_commute(<2 x i32> %px, <2 x i32> %y) { ; CHECK-LABEL: @masked_bit_set_commute( ; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> , [[PX:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %x = srem <2 x i32> , %px ; thwart complexity-based canonicalization @@ -471,10 +471,10 @@ define i16 @zext_masked_bit_zero_to_smaller_bitwidth(i32 %a, i32 %b) { define <4 x i16> @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[Z:%.*]] = and <4 x i16> [[TMP3]], +; CHECK-NEXT: [[Z:%.*]] = and <4 x i16> [[TMP3]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[Z]] ; %shl = shl <4 x i32> , %b @@ -550,9 +550,9 @@ define i64 @zext_masked_bit_zero_to_larger_bitwidth(i32 %a, i32 %b) { define <4 x i64> @zext_masked_bit_zero_to_larger_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[Z:%.*]] = zext nneg <4 x i32> [[TMP3]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[Z]] ; @@ -576,7 +576,7 @@ define i32 @notneg_zext_wider(i8 %x) { define <2 x i8> @notneg_zext_narrower(<2 x i32> %x) { ; CHECK-LABEL: @notneg_zext_narrower( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[CMP]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll b/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll index a296aca65bbf68b..9c364d16badee75 100644 --- a/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll +++ b/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll @@ -7,12 +7,12 @@ define void @test1() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: invoke void @bar() -; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[LPAD:%.*]] ; CHECK: cont: ; CHECK-NEXT: ret void ; CHECK: lpad: ; CHECK-NEXT: [[EX:%.*]] = landingpad { ptr, i32 } -; CHECK-NEXT: cleanup +; CHECK-NEXT: cleanup ; CHECK-NEXT: resume { ptr, i32 } [[EX]] ; entry: @@ -53,7 +53,7 @@ define i32 @test3(i32 %a, float %b) { define i8 @test4(<8 x i8> %V) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[V:%.*]], bitcast (<1 x double> to <8 x i8>) +; CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[V:%.*]], bitcast (<1 x double> splat (double 0x319BEB8FD172E36) to <8 x i8>) ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <8 x i8> [[ADD]], i32 6 ; CHECK-NEXT: ret i8 [[EXTRACT]] ; diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll index 2e3a60522420380..814439a7b453488 100644 --- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll +++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll @@ -286,7 +286,7 @@ define i1 @or_of_icmps0(i32 %b) { define <2 x i1> @or_of_icmps0_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps0_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add <2 x i32> %b, %2 = icmp uge <2 x i32> %1, @@ -308,7 +308,7 @@ define i1 @or_of_icmps1(i32 %b) { define <2 x i1> @or_of_icmps1_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps1_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add nsw <2 x i32> %b, %2 = icmp sge <2 x i32> %1, @@ -330,7 +330,7 @@ define i1 @or_of_icmps2(i32 %b) { define <2 x i1> @or_of_icmps2_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps2_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add <2 x i32> %b, %2 = icmp ugt <2 x i32> %1, @@ -352,7 +352,7 @@ define i1 @or_of_icmps3(i32 %b) { define <2 x i1> @or_of_icmps3_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps3_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add nsw <2 x i32> %b, %2 = icmp sgt <2 x i32> %1, @@ -374,7 +374,7 @@ define i1 @or_of_icmps4(i32 %b) { define <2 x i1> @or_of_icmps4_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps4_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add nuw <2 x i32> %b, %2 = icmp uge <2 x i32> %1, @@ -396,7 +396,7 @@ define i1 @or_of_icmps5(i32 %b) { define <2 x i1> @or_of_icmps5_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps5_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add nuw <2 x i32> %b, %2 = icmp ugt <2 x i32> %1, @@ -492,7 +492,7 @@ define <2 x i3> @and_of_different_cast_icmps_vec(<2 x i8> %i, <2 x i16> %j) { ; CHECK-LABEL: @and_of_different_cast_icmps_vec( ; CHECK-NEXT: [[CMP0:%.*]] = icmp eq <2 x i8> [[I:%.*]], zeroinitializer ; CHECK-NEXT: [[CONV0:%.*]] = zext <2 x i1> [[CMP0]] to <2 x i3> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i16> [[J:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i16> [[J:%.*]], splat (i16 1) ; CHECK-NEXT: [[CONV1:%.*]] = zext <2 x i1> [[CMP1]] to <2 x i3> ; CHECK-NEXT: [[AND:%.*]] = and <2 x i3> [[CONV0]], [[CONV1]] ; CHECK-NEXT: ret <2 x i3> [[AND]] @@ -680,7 +680,7 @@ define i3 @or_xorn_and_commute1(i3 %a, i3 %b) { define <2 x i32> @or_xorn_and_commute2(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @or_xorn_and_commute2( -; CHECK-NEXT: [[NEGA:%.*]] = xor <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[NEGA:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[B:%.*]], [[NEGA]] ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; @@ -800,7 +800,7 @@ define i8 @lshr_perfect_mask(i8 %x) { define <2 x i8> @lshr_oversized_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @lshr_oversized_mask_splat( -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %sh = lshr <2 x i8> %x, @@ -821,7 +821,7 @@ define i8 @lshr_undersized_mask(i8 %x) { define <2 x i8> @shl_perfect_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @shl_perfect_mask_splat( -; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 6) ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %sh = shl <2 x i8> %x, @@ -841,8 +841,8 @@ define i8 @shl_oversized_mask(i8 %x) { define <2 x i8> @shl_undersized_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @shl_undersized_mask_splat( -; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i8> [[SH]], +; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 6) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i8> [[SH]], splat (i8 -120) ; CHECK-NEXT: ret <2 x i8> [[MASK]] ; %sh = shl <2 x i8> %x, @@ -998,7 +998,7 @@ define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) { define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @shl_or_and2v( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[T3]] ; %t1 = zext <2 x i1> %b to <2 x i64> @@ -1014,9 +1014,9 @@ define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) { ; CHECK-LABEL: @shl_or_and3v( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i32> [[T1]], splat (i32 16) ; CHECK-NEXT: [[T4:%.*]] = or <2 x i32> [[T2]], [[T3]] -; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], +; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], splat (i32 -65535) ; CHECK-NEXT: ret <2 x i32> [[T5]] ; %t1 = zext <2 x i16> %a to <2 x i32> @@ -1059,7 +1059,7 @@ define i89 @or_add_sub(i89 %x) { define <3 x i8> @or_sub_add(<3 x i8> %x) { ; CHECK-LABEL: @or_sub_add( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> splat (i8 -1) ; %a = add <3 x i8> %x, %s = sub <3 x i8> , %x @@ -1070,7 +1070,7 @@ define <3 x i8> @or_sub_add(<3 x i8> %x) { define <2 x i17> @xor_add_sub(<2 x i17> %x) { ; CHECK-LABEL: @xor_add_sub( -; CHECK-NEXT: ret <2 x i17> +; CHECK-NEXT: ret <2 x i17> splat (i17 -1) ; %a = add <2 x i17> %x, %s = sub <2 x i17> , %x diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/ARM/mve-vctp.ll b/llvm/test/Transforms/InstSimplify/ConstProp/ARM/mve-vctp.ll index 979fb2e84ba9247..530547b79fabeb0 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/ARM/mve-vctp.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/ARM/mve-vctp.ll @@ -46,7 +46,7 @@ entry: define <16 x i1> @vctp8_16() { ; CHECK-LABEL: @vctp8_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 16) @@ -56,7 +56,7 @@ entry: define <16 x i1> @vctp8_100() { ; CHECK-LABEL: @vctp8_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 100) @@ -66,7 +66,7 @@ entry: define <16 x i1> @vctp8_m1() { ; CHECK-LABEL: @vctp8_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1) @@ -118,7 +118,7 @@ entry: define <8 x i1> @vctp16_8() { ; CHECK-LABEL: @vctp16_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 8) @@ -128,7 +128,7 @@ entry: define <8 x i1> @vctp16_100() { ; CHECK-LABEL: @vctp16_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 100) @@ -138,7 +138,7 @@ entry: define <8 x i1> @vctp16_m1() { ; CHECK-LABEL: @vctp16_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1) @@ -180,7 +180,7 @@ entry: define <4 x i1> @vctp32_4() { ; CHECK-LABEL: @vctp32_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 4) @@ -190,7 +190,7 @@ entry: define <4 x i1> @vctp32_100() { ; CHECK-LABEL: @vctp32_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 100) @@ -200,7 +200,7 @@ entry: define <4 x i1> @vctp32_m1() { ; CHECK-LABEL: @vctp32_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1) @@ -232,7 +232,7 @@ entry: define <2 x i1> @vctp64_2() { ; CHECK-LABEL: @vctp64_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; entry: %int = call <2 x i1> @llvm.arm.mve.vctp64(i32 2) @@ -242,7 +242,7 @@ entry: define <2 x i1> @vctp64_100() { ; CHECK-LABEL: @vctp64_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; entry: %int = call <2 x i1> @llvm.arm.mve.vctp64(i32 100) @@ -252,7 +252,7 @@ entry: define <2 x i1> @vctp64_m1() { ; CHECK-LABEL: @vctp64_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; entry: %int = call <2 x i1> @llvm.arm.mve.vctp64(i32 -1) @@ -260,6 +260,12 @@ entry: } define <4 x float> @poisonc(<4 x float> %a) { +; CHECK-LABEL: @poisonc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR27:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 poison) +; CHECK-NEXT: [[VAR33:%.*]] = select <4 x i1> [[VAR27]], <4 x float> [[A:%.*]], <4 x float> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[VAR33]] +; entry: %new0 = shl i1 0, 1 %last = zext i1 %new0 to i32 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll index 4a879e837f8354a..a904e697cc97577 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll @@ -46,7 +46,7 @@ entry: define <16 x i1> @v16i1_16() { ; CHECK-LABEL: @v16i1_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 16) @@ -56,7 +56,7 @@ entry: define <16 x i1> @v16i1_100() { ; CHECK-LABEL: @v16i1_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 100) @@ -66,7 +66,7 @@ entry: define <16 x i1> @v16i1_m1() { ; CHECK-LABEL: @v16i1_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 -1) @@ -138,7 +138,7 @@ entry: define <8 x i1> @v8i1_8() { ; CHECK-LABEL: @v8i1_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 8) @@ -148,7 +148,7 @@ entry: define <8 x i1> @v8i1_100() { ; CHECK-LABEL: @v8i1_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 100) @@ -158,7 +158,7 @@ entry: define <8 x i1> @v8i1_m1() { ; CHECK-LABEL: @v8i1_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 -1) @@ -220,7 +220,7 @@ entry: define <4 x i1> @v4i1_4() { ; CHECK-LABEL: @v4i1_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) @@ -230,7 +230,7 @@ entry: define <4 x i1> @v4i1_100() { ; CHECK-LABEL: @v4i1_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 100) @@ -240,7 +240,7 @@ entry: define <4 x i1> @v4i1_m1() { ; CHECK-LABEL: @v4i1_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 -1) @@ -293,6 +293,12 @@ entry: define <4 x float> @poisonc(<4 x float> %a, i32 %n) { +; CHECK-LABEL: @poisonc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR27:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 1024) +; CHECK-NEXT: [[VAR33:%.*]] = select <4 x i1> [[VAR27]], <4 x float> [[A:%.*]], <4 x float> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[VAR33]] +; entry: %new0 = shl i1 0, 1 %last = zext i1 %new0 to i32 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll index 9c75b66db50cc79..a21124eaad6c57d 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll @@ -4,7 +4,7 @@ define <1 x i64> @test1() { ; CHECK-LABEL: @test1( -; CHECK-NEXT: ret <1 x i64> +; CHECK-NEXT: ret <1 x i64> splat (i64 63) ; %A = bitcast i64 63 to <1 x i64> ret <1 x i64> %A diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll index a2ed8cd6178d86f..b51f061d7918c3b 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll @@ -57,7 +57,7 @@ define float @nan_f64_trunc() { define <3 x half> @nan_v3f64_trunc() { ; CHECK-LABEL: @nan_v3f64_trunc( -; CHECK-NEXT: ret <3 x half> +; CHECK-NEXT: ret <3 x half> splat (half 0xH7E00) ; %f = fptrunc <3 x double> to <3 x half> ret <3 x half> %f diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll b/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll index deeb238bdd0e4e0..fde98c43e7687d8 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll @@ -61,7 +61,7 @@ define <2 x i8> @test_uadd_vector_no_sat(<2 x i8> %a) { define <2 x i8> @test_uadd_vector_sat(<2 x i8> %a) { ; CHECK-LABEL: @test_uadd_vector_sat( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -77,7 +77,7 @@ define <2 x i8> @test_sadd_vector_no_sat(<2 x i8> %a) { define <2 x i8> @test_sadd_vector_sat_pos(<2 x i8> %a) { ; CHECK-LABEL: @test_sadd_vector_sat_pos( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 127) ; %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -85,7 +85,7 @@ define <2 x i8> @test_sadd_vector_sat_pos(<2 x i8> %a) { define <2 x i8> @test_sadd_vector_sat_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_sadd_vector_sat_neg( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -128) ; %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -157,7 +157,7 @@ define <2 x i8> @test_ssub_vector_no_sat(<2 x i8> %a) { define <2 x i8> @test_ssub_vector_sat_pos(<2 x i8> %a) { ; CHECK-LABEL: @test_ssub_vector_sat_pos( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 127) ; %x = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -165,7 +165,7 @@ define <2 x i8> @test_ssub_vector_sat_pos(<2 x i8> %a) { define <2 x i8> @test_ssub_vector_sat_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_ssub_vector_sat_neg( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -128) ; %x = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -271,7 +271,7 @@ define <2 x i8> @test_ssub_vector_both_undef_splat() { define <2 x i8> @test_uadd_vector_op2_undef_splat() { ; CHECK-LABEL: @test_uadd_vector_op2_undef_splat( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> , <2 x i8> undef) ret <2 x i8> %x @@ -279,7 +279,7 @@ define <2 x i8> @test_uadd_vector_op2_undef_splat() { define <2 x i8> @test_sadd_vector_op1_undef_splat() { ; CHECK-LABEL: @test_sadd_vector_op1_undef_splat( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> ) ret <2 x i8> %x @@ -311,7 +311,7 @@ define <2 x i8> @test_uadd_vector_op2_undef_mix1() { define <2 x i8> @test_uadd_vector_op2_undef_mix2() { ; CHECK-LABEL: @test_uadd_vector_op2_undef_mix2( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -327,7 +327,7 @@ define <2 x i8> @test_sadd_vector_op1_undef_mix1() { define <2 x i8> @test_sadd_vector_op1_undef_mix2() { ; CHECK-LABEL: @test_sadd_vector_op1_undef_mix2( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll index 2be619828e9ea1a..4aed1c5dd7d2a18 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll @@ -64,7 +64,7 @@ define <2 x ptr> @constant_undef_index() { define <2 x ptr> @constant_inbounds() { ; CHECK-LABEL: define <2 x ptr> @constant_inbounds() { -; CHECK-NEXT: ret <2 x ptr> getelementptr (i8, ptr @g, <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> getelementptr (i8, ptr @g, <2 x i64> splat (i64 1)) ; %gep = getelementptr i8, ptr @g, <2 x i64> ret <2 x ptr> %gep diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll index be93d6f9d8be31b..c37dbd6d3a35079 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll @@ -28,7 +28,7 @@ define @vscale_version() { ; The non-scalable version should be optimized as normal. ; CHECK-LABEL: define <8 x i1> @fixed_length_version() { -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) define <8 x i1> @fixed_length_version() { %splatter = insertelement <8 x i1> poison, i1 true, i32 0 %foo = shufflevector <8 x i1> %splatter, diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll index a2f50a637531a93..b575e656f552f12 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll @@ -28,7 +28,7 @@ define @vscale_version() { ; The non-scalable version should be optimized as normal. ; CHECK-LABEL: define <8 x i1> @fixed_length_version() { -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) define <8 x i1> @fixed_length_version() { %splatter = insertelement <8 x i1> undef, i1 true, i32 0 %foo = shufflevector <8 x i1> %splatter, diff --git a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll index 4153872c0adc239..97390ceaad396f6 100644 --- a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll @@ -64,7 +64,7 @@ define i32 @zext_abs(i31 %x) { define <3 x i82> @lshr_abs(<3 x i82> %x) { ; CHECK-LABEL: @lshr_abs( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], splat (i82 1) ; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[LSHR]], i1 true) ; CHECK-NEXT: ret <3 x i82> [[ABS]] ; @@ -121,7 +121,7 @@ define i1 @abs_nsw_must_be_positive(i32 %x) { define <4 x i1> @abs_nsw_must_be_positive_vec(<4 x i32> %x) { ; CHECK-LABEL: @abs_nsw_must_be_positive_vec( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) %c2 = icmp sge <4 x i32> %abs, zeroinitializer @@ -163,7 +163,7 @@ define i1 @abs_known_positive_input_compare(i31 %x) { define <4 x i1> @abs_known_positive_input_compare_vec(<4 x i31> %x) { ; CHECK-LABEL: @abs_known_positive_input_compare_vec( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %zext = zext <4 x i31> %x to <4 x i32> %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %zext, i1 false) @@ -183,7 +183,7 @@ define i1 @abs_known_not_int_min(i32 %x) { define <4 x i1> @abs_known_not_int_min_vec(<4 x i32> %x) { ; CHECK-LABEL: @abs_known_not_int_min_vec( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %or = or <4 x i32> %x, %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %or, i1 false) diff --git a/llvm/test/Transforms/InstSimplify/add_vp.ll b/llvm/test/Transforms/InstSimplify/add_vp.ll index 327d45c4f24cc57..320f3fbb303797a 100644 --- a/llvm/test/Transforms/InstSimplify/add_vp.ll +++ b/llvm/test/Transforms/InstSimplify/add_vp.ll @@ -10,7 +10,7 @@ declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) ; Constant folding should just work. define <2 x i32> @constant_vp_add(<2 x i1> %mask, i32 %evl) { ; CHECK-LABEL: @constant_vp_add( -; CHECK-NEXT: [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> , <2 x i32> , <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; CHECK-NEXT: [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> splat (i32 3), <2 x i32> splat (i32 7), <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; CHECK-NEXT: ret <2 x i32> [[Q]] ; %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> , <2 x i32> , <2 x i1> %mask, i32 %evl) diff --git a/llvm/test/Transforms/InstSimplify/addsub.ll b/llvm/test/Transforms/InstSimplify/addsub.ll index dcfdb4d2a5404b0..aa13b3194c464c7 100644 --- a/llvm/test/Transforms/InstSimplify/addsub.ll +++ b/llvm/test/Transforms/InstSimplify/addsub.ll @@ -12,7 +12,7 @@ define i1 @test1(i1 %a) { define <2 x i1> @test2(<2 x i1> %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %b = xor <2 x i1> %a, %res = sub <2 x i1> %a, %b diff --git a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll index 2e75a0d2c98abf5..3f1672d66abf0d1 100644 --- a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll +++ b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll @@ -68,11 +68,8 @@ define <4 x i32> @test8(<1 x i64> %y) { } define <4 x i32> @test9(<1 x i64> %y) { -; CONSTVEC-LABEL: @test9( -; CONSTVEC-NEXT: ret <4 x i32> -; -; CONSTSPLAT-LABEL: @test9( -; CONSTSPLAT-NEXT: ret <4 x i32> splat (i32 -1) +; CHECK-LABEL: @test9( +; CHECK-NEXT: ret <4 x i32> splat (i32 -1) ; %c = bitcast <2 x i64> to <4 x i32> ret <4 x i32> %c @@ -80,7 +77,7 @@ define <4 x i32> @test9(<1 x i64> %y) { define <1 x i1> @test10() { ; CONSTVEC-LABEL: @test10( -; CONSTVEC-NEXT: [[RET:%.*]] = icmp eq <1 x i64> to i64)>, zeroinitializer +; CONSTVEC-NEXT: [[RET:%.*]] = icmp eq <1 x i64> splat (double 0xFFFFFFFFFFFFFFFF) to i64)>, zeroinitializer ; CONSTVEC-NEXT: ret <1 x i1> [[RET]] ; ; CONSTSPLAT-LABEL: @test10( @@ -93,11 +90,8 @@ define <1 x i1> @test10() { ; from MultiSource/Benchmarks/Bullet define <2 x float> @foo() { -; CONSTVEC-LABEL: @foo( -; CONSTVEC-NEXT: ret <2 x float> -; -; CONSTSPLAT-LABEL: @foo( -; CONSTSPLAT-NEXT: ret <2 x float> splat (float 0xFFFFFFFFE0000000) +; CHECK-LABEL: @foo( +; CHECK-NEXT: ret <2 x float> splat (float 0xFFFFFFFFE0000000) ; %cast = bitcast i64 -1 to <2 x float> ret <2 x float> %cast @@ -105,22 +99,16 @@ define <2 x float> @foo() { define <2 x double> @foo2() { -; CONSTVEC-LABEL: @foo2( -; CONSTVEC-NEXT: ret <2 x double> -; -; CONSTSPLAT-LABEL: @foo2( -; CONSTSPLAT-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) +; CHECK-LABEL: @foo2( +; CHECK-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) ; %cast = bitcast i128 -1 to <2 x double> ret <2 x double> %cast } define <1 x float> @foo3() { -; CONSTVEC-LABEL: @foo3( -; CONSTVEC-NEXT: ret <1 x float> -; -; CONSTSPLAT-LABEL: @foo3( -; CONSTSPLAT-NEXT: ret <1 x float> splat (float 0xFFFFFFFFE0000000) +; CHECK-LABEL: @foo3( +; CHECK-NEXT: ret <1 x float> splat (float 0xFFFFFFFFE0000000) ; %cast = bitcast i32 -1 to <1 x float> ret <1 x float> %cast @@ -143,11 +131,8 @@ define double @foo5() { } define <2 x double> @foo6() { -; CONSTVEC-LABEL: @foo6( -; CONSTVEC-NEXT: ret <2 x double> -; -; CONSTSPLAT-LABEL: @foo6( -; CONSTSPLAT-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) +; CHECK-LABEL: @foo6( +; CHECK-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) ; %cast = bitcast <4 x i32> to <2 x double> ret <2 x double> %cast @@ -299,7 +284,7 @@ define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() { define <1 x i32> @bitcast_constexpr_scalar_fp_to_vector_int() { ; CONSTVEC-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int( -; CONSTVEC-NEXT: ret <1 x i32> +; CONSTVEC-NEXT: ret <1 x i32> splat (i32 1065353216) ; ; CONSTSPLAT-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int( ; CONSTSPLAT-NEXT: ret <1 x i32> bitcast (<1 x float> splat (float 1.000000e+00) to <1 x i32>) diff --git a/llvm/test/Transforms/InstSimplify/bitreverse-fold.ll b/llvm/test/Transforms/InstSimplify/bitreverse-fold.ll index d872a5bbbccfd41..ba42e0f21d07770 100644 --- a/llvm/test/Transforms/InstSimplify/bitreverse-fold.ll +++ b/llvm/test/Transforms/InstSimplify/bitreverse-fold.ll @@ -73,7 +73,7 @@ define <2 x i1> @reverse_false_v2i1() { } ; CHECK-LABEL: @reverse_true_v2i1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) define <2 x i1> @reverse_true_v2i1() { %x = call <2 x i1> @llvm.bitreverse.v2i1(<2 x i1> ) ret <2 x i1> %x diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index c6f6b65f89dc2ba..67d5c4dbfb2e7d8 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -982,7 +982,7 @@ define <2 x i8> @fshr_zero_vec(<2 x i8> %shamt) { define <2 x i7> @fshl_ones_vec(<2 x i7> %shamt) { ; CHECK-LABEL: @fshl_ones_vec( -; CHECK-NEXT: ret <2 x i7> +; CHECK-NEXT: ret <2 x i7> splat (i7 -1) ; %r = call <2 x i7> @llvm.fshl.v2i7(<2 x i7> , <2 x i7> , <2 x i7> %shamt) ret <2 x i7> %r @@ -1421,7 +1421,7 @@ define i32 @ctpop_pow2(i32 %x) { define <3 x i33> @ctpop_signbit(<3 x i33> %x) { ; CHECK-LABEL: @ctpop_signbit( -; CHECK-NEXT: [[B:%.*]] = lshr <3 x i33> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = lshr <3 x i33> [[X:%.*]], splat (i33 32) ; CHECK-NEXT: ret <3 x i33> [[B]] ; %b = lshr <3 x i33> %x, @@ -1433,7 +1433,7 @@ define <3 x i33> @ctpop_signbit(<3 x i33> %x) { define <3 x i33> @ctpop_notsignbit(<3 x i33> %x) { ; CHECK-LABEL: @ctpop_notsignbit( -; CHECK-NEXT: [[B:%.*]] = lshr <3 x i33> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = lshr <3 x i33> [[X:%.*]], splat (i33 31) ; CHECK-NEXT: [[R:%.*]] = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> [[B]]) ; CHECK-NEXT: ret <3 x i33> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/canonicalize.ll b/llvm/test/Transforms/InstSimplify/canonicalize.ll index c3877ec0cef8d13..9d2bdd1b853e612 100644 --- a/llvm/test/Transforms/InstSimplify/canonicalize.ll +++ b/llvm/test/Transforms/InstSimplify/canonicalize.ll @@ -27,7 +27,7 @@ define <2 x float> @canonicalize_zero_vector() { define <2 x float> @canonicalize_negzero_vector() { ; CHECK-LABEL: @canonicalize_negzero_vector( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float -0.000000e+00) ; %ret = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> ) ret <2 x float> %ret diff --git a/llvm/test/Transforms/InstSimplify/cast-unsigned-icmp-cmp-0.ll b/llvm/test/Transforms/InstSimplify/cast-unsigned-icmp-cmp-0.ll index 8f9c75bae1097d1..20bdbb91ba85750 100644 --- a/llvm/test/Transforms/InstSimplify/cast-unsigned-icmp-cmp-0.ll +++ b/llvm/test/Transforms/InstSimplify/cast-unsigned-icmp-cmp-0.ll @@ -49,7 +49,7 @@ define i1 @i32_cast_cmp_sgt_int_m1_uitofp_float(i32 %i) { define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_float_vec(<2 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_float_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %f = uitofp <2 x i32> %i to <2 x float> %b = bitcast <2 x float> %f to <2 x i32> @@ -72,7 +72,7 @@ define i1 @i32_cast_cmp_sgt_int_m1_uitofp_float_vec_mismatch(<2 x i32> %i) { define <3 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_float_vec_poison(<3 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_float_vec_poison( -; CHECK-NEXT: ret <3 x i1> +; CHECK-NEXT: ret <3 x i1> splat (i1 true) ; %f = uitofp <3 x i32> %i to <3 x float> %b = bitcast <3 x float> %f to <3 x i32> @@ -122,7 +122,7 @@ define i1 @i32_cast_cmp_sgt_int_m1_uitofp_double(i32 %i) { define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_double_vec(<2 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_double_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %f = uitofp <2 x i32> %i to <2 x double> %b = bitcast <2 x double> %f to <2 x i64> @@ -132,7 +132,7 @@ define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_double_vec(<2 x i32> %i) { define <3 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_double_vec_poison(<3 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_double_vec_poison( -; CHECK-NEXT: ret <3 x i1> +; CHECK-NEXT: ret <3 x i1> splat (i1 true) ; %f = uitofp <3 x i32> %i to <3 x double> %b = bitcast <3 x double> %f to <3 x i64> @@ -182,7 +182,7 @@ define i1 @i32_cast_cmp_sgt_int_m1_uitofp_half(i32 %i) { define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_half_vec(<2 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_half_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %f = uitofp <2 x i32> %i to <2 x half> %b = bitcast <2 x half> %f to <2 x i16> @@ -192,7 +192,7 @@ define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_half_vec(<2 x i32> %i) { define <3 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_half_vec_poison(<3 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_half_vec_poison( -; CHECK-NEXT: ret <3 x i1> +; CHECK-NEXT: ret <3 x i1> splat (i1 true) ; %f = uitofp <3 x i32> %i to <3 x half> %b = bitcast <3 x half> %f to <3 x i16> diff --git a/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll b/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll index 169ecc3c2d37e39..f9cad7e4345051b 100644 --- a/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll +++ b/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll @@ -5,7 +5,7 @@ define <2 x i1> @i32cmp_eq_fixed_zero() { ; CHECK-LABEL: @i32cmp_eq_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp eq <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -21,7 +21,7 @@ define @i32cmp_eq_scalable_zero() { define <2 x i1> @i32cmp_eq_fixed_one() { ; CHECK-LABEL: @i32cmp_eq_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp eq <2 x i32> , ret <2 x i1> %res @@ -101,7 +101,7 @@ define @i32cmp_ugt_scalable_one() { define <2 x i1> @i32cmp_uge_fixed_zero() { ; CHECK-LABEL: @i32cmp_uge_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp uge <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -117,7 +117,7 @@ define @i32cmp_uge_scalable_zero() { define <2 x i1> @i32cmp_uge_fixed_one() { ; CHECK-LABEL: @i32cmp_uge_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp uge <2 x i32> , ret <2 x i1> %res @@ -165,7 +165,7 @@ define @i32cmp_ult_scalable_one() { define <2 x i1> @i32cmp_ule_fixed_zero() { ; CHECK-LABEL: @i32cmp_ule_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp ule <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -181,7 +181,7 @@ define @i32cmp_ule_scalable_zero() { define <2 x i1> @i32cmp_ule_fixed_one() { ; CHECK-LABEL: @i32cmp_ule_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp ule <2 x i32> , ret <2 x i1> %res @@ -229,7 +229,7 @@ define @i32cmp_sgt_scalable_one() { define <2 x i1> @i32cmp_sge_fixed_zero() { ; CHECK-LABEL: @i32cmp_sge_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp sge <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -245,7 +245,7 @@ define @i32cmp_sge_scalable_zero() { define <2 x i1> @i32cmp_sge_fixed_one() { ; CHECK-LABEL: @i32cmp_sge_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp sge <2 x i32> , ret <2 x i1> %res @@ -293,7 +293,7 @@ define @i32cmp_slt_scalable_one() { define <2 x i1> @i32cmp_sle_fixed_zero() { ; CHECK-LABEL: @i32cmp_sle_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp sle <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -309,7 +309,7 @@ define @i32cmp_sle_scalable_zero() { define <2 x i1> @i32cmp_sle_fixed_one() { ; CHECK-LABEL: @i32cmp_sle_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp sle <2 x i32> , ret <2 x i1> %res @@ -357,7 +357,7 @@ define @floatcmp_false_scalable_one() { define <2 x i1> @floatcmp_oeq_fixed_zero() { ; CHECK-LABEL: @floatcmp_oeq_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp oeq <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -373,7 +373,7 @@ define @floatcmp_oeq_scalable_zero() { define <2 x i1> @floatcmp_oeq_fixed_one() { ; CHECK-LABEL: @floatcmp_oeq_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp oeq <2 x float> , ret <2 x i1> %res @@ -421,7 +421,7 @@ define @floatcmp_ogt_scalable_one() { define <2 x i1> @floatcmp_oge_fixed_zero() { ; CHECK-LABEL: @floatcmp_oge_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp oge <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -437,7 +437,7 @@ define @floatcmp_oge_scalable_zero() { define <2 x i1> @floatcmp_oge_fixed_one() { ; CHECK-LABEL: @floatcmp_oge_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp oge <2 x float> , ret <2 x i1> %res @@ -485,7 +485,7 @@ define @floatcmp_olt_scalable_one() { define <2 x i1> @floatcmp_ole_fixed_zero() { ; CHECK-LABEL: @floatcmp_ole_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ole <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -501,7 +501,7 @@ define @floatcmp_ole_scalable_zero() { define <2 x i1> @floatcmp_ole_fixed_one() { ; CHECK-LABEL: @floatcmp_ole_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ole <2 x float> , ret <2 x i1> %res @@ -549,7 +549,7 @@ define @floatcmp_one_scalable_one() { define <2 x i1> @floatcmp_ord_fixed_zero() { ; CHECK-LABEL: @floatcmp_ord_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ord <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -565,7 +565,7 @@ define @floatcmp_ord_scalable_zero() { define <2 x i1> @floatcmp_ord_fixed_one() { ; CHECK-LABEL: @floatcmp_ord_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ord <2 x float> , ret <2 x i1> %res @@ -581,7 +581,7 @@ define @floatcmp_ord_scalable_one() { define <2 x i1> @floatcmp_ueq_fixed_zero() { ; CHECK-LABEL: @floatcmp_ueq_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ueq <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -597,7 +597,7 @@ define @floatcmp_ueq_scalable_zero() { define <2 x i1> @floatcmp_ueq_fixed_one() { ; CHECK-LABEL: @floatcmp_ueq_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ueq <2 x float> , ret <2 x i1> %res @@ -645,7 +645,7 @@ define @floatcmp_ugt_scalable_one() { define <2 x i1> @floatcmp_uge_fixed_zero() { ; CHECK-LABEL: @floatcmp_uge_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp uge <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -661,7 +661,7 @@ define @floatcmp_uge_scalable_zero() { define <2 x i1> @floatcmp_uge_fixed_one() { ; CHECK-LABEL: @floatcmp_uge_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp uge <2 x float> , ret <2 x i1> %res @@ -709,7 +709,7 @@ define @floatcmp_ult_scalable_one() { define <2 x i1> @floatcmp_ule_fixed_zero() { ; CHECK-LABEL: @floatcmp_ule_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ule <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -725,7 +725,7 @@ define @floatcmp_ule_scalable_zero() { define <2 x i1> @floatcmp_ule_fixed_one() { ; CHECK-LABEL: @floatcmp_ule_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ule <2 x float> , ret <2 x i1> %res @@ -805,7 +805,7 @@ define @floatcmp_uno_scalable_one() { define <2 x i1> @floatcmp_true_fixed_zero() { ; CHECK-LABEL: @floatcmp_true_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp true <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -821,7 +821,7 @@ define @floatcmp_true_scalable_zero() { define <2 x i1> @floatcmp_true_fixed_one() { ; CHECK-LABEL: @floatcmp_true_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp true <2 x float> , ret <2 x i1> %res diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index 8077f5ad5c67370..21653d800dce2d1 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -1287,7 +1287,7 @@ define i1 @mul3(i32 %X, i32 %Y) { define <2 x i1> @mul3v(<2 x i32> %X, <2 x i32> %Y) { ; CHECK-LABEL: @mul3v( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %XX = mul nsw <2 x i32> %X, %X %YY = mul nsw <2 x i32> %Y, %Y @@ -1465,7 +1465,7 @@ define i1 @compare_always_true_slt(i16 %a) { define <2 x i1> @compare_always_true_slt_splat(<2 x i16> %a) { ; CHECK-LABEL: @compare_always_true_slt_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %t1 = zext <2 x i16> %a to <2 x i32> %t2 = sub <2 x i32> zeroinitializer, %t1 @@ -1485,7 +1485,7 @@ define i1 @compare_always_true_sle(i16 %a) { define <2 x i1> @compare_always_true_sle_splat(<2 x i16> %a) { ; CHECK-LABEL: @compare_always_true_sle_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %t1 = zext <2 x i16> %a to <2 x i32> %t2 = sub <2 x i32> zeroinitializer, %t1 @@ -1565,7 +1565,7 @@ define i1 @compare_always_true_ne(i16 %a) { define <2 x i1> @compare_always_true_ne_splat(<2 x i16> %a) { ; CHECK-LABEL: @compare_always_true_ne_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %t1 = zext <2 x i16> %a to <2 x i32> %t2 = sub <2 x i32> zeroinitializer, %t1 @@ -1725,7 +1725,7 @@ define i1 @icmp_ne_const(i32 %a) { define <2 x i1> @icmp_ne_const_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_const_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %b = mul nsw <2 x i32> %a, %c = icmp ne <2 x i32> %b, @@ -1851,7 +1851,7 @@ define i1 @icmp_shl_1_V_ule_2147483648(i32 %V) { define <2 x i1> @icmp_shl_1_ule_signmask(<2 x i8> %V) { ; CHECK-LABEL: @icmp_shl_1_ule_signmask( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %shl = shl <2 x i8> , %V %cmp = icmp ule <2 x i8> %shl, @@ -1860,7 +1860,7 @@ define <2 x i1> @icmp_shl_1_ule_signmask(<2 x i8> %V) { define <2 x i1> @icmp_shl_1_ule_signmask_poison(<2 x i8> %V) { ; CHECK-LABEL: @icmp_shl_1_ule_signmask_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %shl = shl <2 x i8> , %V %cmp = icmp ule <2 x i8> %shl, @@ -1869,7 +1869,7 @@ define <2 x i1> @icmp_shl_1_ule_signmask_poison(<2 x i8> %V) { define <2 x i1> @icmp_shl_1_ule_signmask_poison2(<2 x i8> %V) { ; CHECK-LABEL: @icmp_shl_1_ule_signmask_poison2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %shl = shl <2 x i8> , %V %cmp = icmp ule <2 x i8> %shl, @@ -1914,7 +1914,7 @@ define i1 @shl_1_cmp_ne_nonpow2(i32 %x) { define <2 x i1> @shl_1_cmp_ne_nonpow2_splat(<2 x i32> %x) { ; CHECK-LABEL: @shl_1_cmp_ne_nonpow2_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %s = shl <2 x i32> , %x %c = icmp ne <2 x i32> %s, @@ -1923,7 +1923,7 @@ define <2 x i1> @shl_1_cmp_ne_nonpow2_splat(<2 x i32> %x) { define <2 x i1> @shl_1_cmp_ne_nonpow2_splat_poison(<2 x i32> %x) { ; CHECK-LABEL: @shl_1_cmp_ne_nonpow2_splat_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %s = shl <2 x i32> , %x %c = icmp ne <2 x i32> %s, @@ -1941,7 +1941,7 @@ define i1 @shl_pow2_cmp_eq_nonpow2(i32 %x) { define <2 x i1> @shl_pow21_cmp_ne_nonpow2_splat_poison(<2 x i32> %x) { ; CHECK-LABEL: @shl_pow21_cmp_ne_nonpow2_splat_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %s = shl <2 x i32> , %x %c = icmp ne <2 x i32> %s, @@ -1965,7 +1965,7 @@ define i1 @shl_pow2_cmp_ne_zero(i32 %x) { define <2 x i1> @shl_pow2_cmp_ne_zero_splat(<2 x i32> %x) { ; CHECK-LABEL: @shl_pow2_cmp_ne_zero_splat( -; CHECK-NEXT: [[S:%.*]] = shl <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = shl <2 x i32> splat (i32 16), [[X:%.*]] ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[S]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -1985,7 +1985,7 @@ define i1 @shl_pow2_cmp_eq_zero_nuw(i32 %x) { define <2 x i1> @shl_pow2_cmp_ne_zero_nuw_splat_poison(<2 x i32> %x) { ; CHECK-LABEL: @shl_pow2_cmp_ne_zero_nuw_splat_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %s = shl nuw <2 x i32> , %x %c = icmp ne <2 x i32> %s, @@ -2730,7 +2730,7 @@ define i1 @icmp_nsw_i64(i64 %V) { define <4 x i1> @icmp_nsw_vec(<4 x i32> %V) { ; CHECK-LABEL: @icmp_nsw_vec( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %add5 = add <4 x i32> %V, %add6 = add nsw <4 x i32> %V, @@ -2937,7 +2937,7 @@ define i1 @ctpop_ne_big_bitwidth(i73 %x) { define <2 x i1> @ctpop_slt_bitwidth_plus1_splat(<2 x i13> %x) { ; CHECK-LABEL: @ctpop_slt_bitwidth_plus1_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x) %cmp = icmp slt <2 x i13> %pop, @@ -2949,7 +2949,7 @@ define <2 x i1> @ctpop_slt_bitwidth_plus1_splat(<2 x i13> %x) { define <2 x i1> @ctpop_slt_bitwidth_splat(<2 x i13> %x) { ; CHECK-LABEL: @ctpop_slt_bitwidth_splat( ; CHECK-NEXT: [[POP:%.*]] = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], splat (i13 13) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x) @@ -3030,7 +3030,7 @@ define i1 @ctlz_ne_big_bitwidth(i73 %x) { define <2 x i1> @ctlz_slt_bitwidth_plus1_splat(<2 x i13> %x) { ; CHECK-LABEL: @ctlz_slt_bitwidth_plus1_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %pop = call <2 x i13> @llvm.ctlz.v2i13(<2 x i13> %x) %cmp = icmp slt <2 x i13> %pop, @@ -3042,7 +3042,7 @@ define <2 x i1> @ctlz_slt_bitwidth_plus1_splat(<2 x i13> %x) { define <2 x i1> @ctlz_slt_bitwidth_splat(<2 x i13> %x) { ; CHECK-LABEL: @ctlz_slt_bitwidth_splat( ; CHECK-NEXT: [[POP:%.*]] = call <2 x i13> @llvm.ctlz.v2i13(<2 x i13> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], splat (i13 13) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = call <2 x i13> @llvm.ctlz.v2i13(<2 x i13> %x) @@ -3123,7 +3123,7 @@ define i1 @cttz_ne_big_bitwidth(i73 %x) { define <2 x i1> @cttz_slt_bitwidth_plus1_splat(<2 x i13> %x) { ; CHECK-LABEL: @cttz_slt_bitwidth_plus1_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %pop = call <2 x i13> @llvm.cttz.v2i13(<2 x i13> %x) %cmp = icmp slt <2 x i13> %pop, @@ -3135,7 +3135,7 @@ define <2 x i1> @cttz_slt_bitwidth_plus1_splat(<2 x i13> %x) { define <2 x i1> @cttz_slt_bitwidth_splat(<2 x i13> %x) { ; CHECK-LABEL: @cttz_slt_bitwidth_splat( ; CHECK-NEXT: [[POP:%.*]] = call <2 x i13> @llvm.cttz.v2i13(<2 x i13> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], splat (i13 13) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = call <2 x i13> @llvm.cttz.v2i13(<2 x i13> %x) diff --git a/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll b/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll index 92d6cc30d6248ef..34fa05856b22dbd 100644 --- a/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll +++ b/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll @@ -57,7 +57,7 @@ define i8 @knownbits_allones(i8 %x, i8 %y) { define <2 x i8> @add_vec(<2 x i8> %x) { ; CHECK-LABEL: @add_vec( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %ret = add nuw <2 x i8> %x, ret <2 x i8> %ret diff --git a/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll b/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll index 3f4a08807a4b41f..1ea9f7fb8e46d00 100644 --- a/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll +++ b/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll @@ -72,7 +72,7 @@ define i8 @knownbits_negativeorzero(i8 %x, i8 %y) { define <2 x i8> @shl_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_vec( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %ret = shl nuw <2 x i8> , %x ret <2 x i8> %ret diff --git a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll index 48cc8895aebbcaf..92e43654806a4bf 100644 --- a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll @@ -121,7 +121,7 @@ define i64 @ctpop_x_and_negx_nz(i64 %x) { define <2 x i32> @ctpop_shl1_vec(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_shl1_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %shl = shl <2 x i32> , %x %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %shl) @@ -141,7 +141,7 @@ define <2 x i32> @ctpop_shl2_1_vec(<2 x i32> %x) { define <2 x i32> @ctpop_lshr_intmin_vec(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_lshr_intmin_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %shr = lshr <2 x i32> , %x %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %shr) @@ -162,7 +162,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec(<2 x i32> %x) { define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_lshr_intmin_intmin_plus1_vec_nz( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> , [[X1]] ; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]]) ; CHECK-NEXT: ret <2 x i32> [[CNT]] @@ -176,7 +176,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) { define <2 x i32> @ctpop_shl2_1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_shl2_1_vec_nz( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 15) ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[AND]] ; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL]]) ; CHECK-NEXT: ret <2 x i32> [[CNT]] @@ -202,7 +202,7 @@ define <2 x i32> @ctpop_x_and_negx_vec(<2 x i32> %x) { define <2 x i32> @ctpop_x_and_negx_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_x_and_negx_vec_nz( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[SUB]], [[X]] ; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]]) diff --git a/llvm/test/Transforms/InstSimplify/div.ll b/llvm/test/Transforms/InstSimplify/div.ll index e2bc121aee45712..6566fa0292a2521 100644 --- a/llvm/test/Transforms/InstSimplify/div.ll +++ b/llvm/test/Transforms/InstSimplify/div.ll @@ -402,8 +402,8 @@ define <2 x i8> @udiv_exact_trailing_zeros(<2 x i8> %x) { define <2 x i8> @udiv_exact_trailing_zeros_eq(<2 x i8> %x) { ; CHECK-LABEL: @udiv_exact_trailing_zeros_eq( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = udiv exact <2 x i8> [[O]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 28) +; CHECK-NEXT: [[R:%.*]] = udiv exact <2 x i8> [[O]], splat (i8 12) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %o = or <2 x i8> %x, @@ -428,7 +428,7 @@ define i8 @udiv_trailing_zeros(i8 %x) { define <2 x i8> @udiv_exact_trailing_zeros_nonuniform_vector(<2 x i8> %x) { ; CHECK-LABEL: @udiv_exact_trailing_zeros_nonuniform_vector( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[R:%.*]] = udiv exact <2 x i8> [[O]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/exp10.ll b/llvm/test/Transforms/InstSimplify/exp10.ll index 11617ceec666d04..a546bb1255d8540 100644 --- a/llvm/test/Transforms/InstSimplify/exp10.ll +++ b/llvm/test/Transforms/InstSimplify/exp10.ll @@ -9,8 +9,8 @@ declare @llvm.exp10.nxv2f32() define float @exp10_exp10(float %x) { -; CHECK-LABEL: define float @exp10_exp10 -; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-LABEL: define float @exp10_exp10( +; CHECK-SAME: float [[X:%.*]]) { ; CHECK-NEXT: [[EXP100:%.*]] = call float @llvm.exp10.f32(float [[X]]) ; CHECK-NEXT: [[EXP101:%.*]] = call float @llvm.exp10.f32(float [[EXP100]]) ; CHECK-NEXT: ret float [[EXP101]] @@ -21,8 +21,8 @@ define float @exp10_exp10(float %x) { } define <2 x float> @exp10_exp10_vector(<2 x float> %x) { -; CHECK-LABEL: define <2 x float> @exp10_exp10_vector -; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-LABEL: define <2 x float> @exp10_exp10_vector( +; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: [[EXP100:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[X]]) ; CHECK-NEXT: [[EXP101:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[EXP100]]) ; CHECK-NEXT: ret <2 x float> [[EXP101]] @@ -33,8 +33,8 @@ define <2 x float> @exp10_exp10_vector(<2 x float> %x) { } define float @exp10_exp10_const(float %x) { -; CHECK-LABEL: define float @exp10_exp10_const -; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-LABEL: define float @exp10_exp10_const( +; CHECK-SAME: float [[X:%.*]]) { ; CHECK-NEXT: [[EXP101:%.*]] = call float @llvm.exp10.f32(float 0x7FF0000000000000) ; CHECK-NEXT: ret float [[EXP101]] ; @@ -44,8 +44,8 @@ define float @exp10_exp10_const(float %x) { } define @exp10_exp10_scalable_vector( %x) { -; CHECK-LABEL: define @exp10_exp10_scalable_vector -; CHECK-SAME: ( [[X:%.*]]) { +; CHECK-LABEL: define @exp10_exp10_scalable_vector( +; CHECK-SAME: [[X:%.*]]) { ; CHECK-NEXT: [[EXP100:%.*]] = call @llvm.exp10.nxv2f32( [[X]]) ; CHECK-NEXT: [[EXP101:%.*]] = call @llvm.exp10.nxv2f32( [[EXP100]]) ; CHECK-NEXT: ret [[EXP101]] @@ -101,7 +101,7 @@ define <2 x float> @exp10_undef_vector() { define <2 x float> @exp10_zero_vector() { ; CHECK-LABEL: define <2 x float> @exp10_zero_vector() { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> zeroinitializer) ret <2 x float> %ret @@ -118,7 +118,7 @@ define @exp10_zero_scalable_vector() { define <2 x float> @exp10_zero_negzero_vector() { ; CHECK-LABEL: define <2 x float> @exp10_zero_negzero_vector() { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) ret <2 x float> %ret @@ -262,7 +262,7 @@ define ppc_fp128 @canonicalize_noncanonical_zero_1_ppcf128() { define <2 x float> @exp10_splat_4() { ; CHECK-LABEL: define <2 x float> @exp10_splat_4() { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+04) ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) ret <2 x float> %ret @@ -270,7 +270,7 @@ define <2 x float> @exp10_splat_4() { define <2 x float> @exp10_splat_qnan() { ; CHECK-LABEL: define <2 x float> @exp10_splat_qnan() { -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0x7FF8000000000000)) ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) @@ -279,7 +279,7 @@ define <2 x float> @exp10_splat_qnan() { define <2 x float> @exp10_splat_inf() { ; CHECK-LABEL: define <2 x float> @exp10_splat_inf() { -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0x7FF0000000000000)) ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) @@ -288,7 +288,7 @@ define <2 x float> @exp10_splat_inf() { define <2 x float> @exp10_splat_neginf() { ; CHECK-LABEL: define <2 x float> @exp10_splat_neginf() { -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0xFFF0000000000000)) ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) diff --git a/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll b/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll index b1d772890aff83c..963953ad2b3bc67 100644 --- a/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll @@ -80,7 +80,7 @@ define float @fadd_unary_fnegx(float %x) #0 { define <2 x float> @fadd_binary_fnegx_commute_vec(<2 x float> %x) #0 { ; CHECK-LABEL: @fadd_binary_fnegx_commute_vec( -; CHECK-NEXT: [[NEGX:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> , <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[NEGX:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[X]], <2 x float> [[NEGX]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -415,7 +415,7 @@ define double @frem_negzero_by_x(double %x) #0 { define <2 x double> @frem_negzero_by_x_vec_poison(<2 x double> %x) #0 { ; CHECK-LABEL: @frem_negzero_by_x_vec_poison( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double -0.000000e+00) ; %r = call nnan <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double> , <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x double> %r diff --git a/llvm/test/Transforms/InstSimplify/fast-math.ll b/llvm/test/Transforms/InstSimplify/fast-math.ll index 287f30b162f8049..61bb5b976b98bbb 100644 --- a/llvm/test/Transforms/InstSimplify/fast-math.ll +++ b/llvm/test/Transforms/InstSimplify/fast-math.ll @@ -397,7 +397,7 @@ define double @frem_negzero_by_x(double %x) { define <2 x double> @frem_negzero_by_x_vec_poison(<2 x double> %x) { ; CHECK-LABEL: @frem_negzero_by_x_vec_poison( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double -0.000000e+00) ; %r = frem nnan <2 x double> , %x ret <2 x double> %r @@ -469,7 +469,7 @@ define float @fdiv_neg_swapped2(float %f) { define <2 x float> @fdiv_neg_vec_poison_elt(<2 x float> %f) { ; CHECK-LABEL: @fdiv_neg_vec_poison_elt( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float -1.000000e+00) ; %neg = fsub <2 x float> , %f %div = fdiv nnan <2 x float> %f, %neg diff --git a/llvm/test/Transforms/InstSimplify/fdiv.ll b/llvm/test/Transforms/InstSimplify/fdiv.ll index fb59011b91d5bd4..91f9b556d66250b 100644 --- a/llvm/test/Transforms/InstSimplify/fdiv.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv.ll @@ -148,7 +148,7 @@ define <2 x float> @fdiv_nnan_nsz_ninf_by_zero_v2f32(<2 x float> %x) { define <2 x float> @fdiv_nnan_nsz_ninf_by_negzero_v2f32(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_negzero_v2f32( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], splat (float -0.000000e+00) ; CHECK-NEXT: ret <2 x float> [[FDIV]] ; %fdiv = fdiv nnan nsz <2 x float> %x, diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll index 32ea4cb7cd198da..9a078a8f569dab1 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll @@ -15,7 +15,7 @@ define float @fsub_-0_x(float %a) #0 { define <2 x float> @fsub_-0_x_vec(<2 x float> %a) #0 { ; CHECK-LABEL: @fsub_-0_x_vec( -; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> , <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: [[RET:%.*]] = fneg <2 x float> [[T1]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; @@ -68,8 +68,8 @@ define float @fneg_x(float %a) #0 { define <2 x float> @fsub_-0_-0_x_vec(<2 x float> %a) #0 { ; CHECK-LABEL: @fsub_-0_-0_x_vec( -; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> , <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> , <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[RET]] ; %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll index 7a35f09f03b995b..d3178a103d42cda 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -221,7 +221,7 @@ define float @src_mul_nzero_neg(float nofpclass(inf nan pzero psub pnorm) %f) { define <2 x float> @src_mul_zero_neg(<2 x float> nofpclass(inf nan pzero psub pnorm) %f) { ; CHECK-LABEL: @src_mul_zero_neg( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float -0.000000e+00) ; %r = fmul <2 x float> , %f ret <2 x float> %r @@ -372,7 +372,7 @@ define float @fabs_select_positive_constants(i32 %c) { define <2 x float> @fabs_select_positive_constants_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_positive_constants_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float 2.000000e+00) ; CHECK-NEXT: ret <2 x float> [[SELECT]] ; %cmp = icmp eq i32 %c, 0 @@ -397,7 +397,7 @@ define float @fabs_select_constant_variable(i32 %c, float %x) { define <2 x float> @fabs_select_constant_variable_vector(i32 %c, <2 x float> %x) { ; CHECK-LABEL: @fabs_select_constant_variable_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> [[X:%.*]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> [[X:%.*]] ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -423,7 +423,7 @@ define float @fabs_select_neg0_pos0(i32 %c) { define <2 x float> @fabs_select_neg0_pos0_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_neg0_pos0_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> zeroinitializer +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float -0.000000e+00), <2 x float> zeroinitializer ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -449,7 +449,7 @@ define float @fabs_select_neg0_neg1(i32 %c) { define <2 x float> @fabs_select_neg0_neg1_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_neg0_neg1_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float -0.000000e+00), <2 x float> splat (float -1.000000e+00) ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -474,7 +474,7 @@ define float @fabs_select_nan_nan(i32 %c) { define <2 x float> @fabs_select_nan_nan_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_nan_nan_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0x7FF8000000000000), <2 x float> splat (float 0x7FF8000100000000) ; CHECK-NEXT: ret <2 x float> [[SELECT]] ; %cmp = icmp eq i32 %c, 0 @@ -499,7 +499,7 @@ define float @fabs_select_negnan_nan(i32 %c) { define <2 x float> @fabs_select_negnan_nan_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_negnan_nan_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float 0x7FF8000000000000) ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -525,7 +525,7 @@ define float @fabs_select_negnan_negnan(i32 %c) { define <2 x float> @fabs_select_negnan_negnan_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_negnan_negnan_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float 0x7FF8000100000000) ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -551,7 +551,7 @@ define float @fabs_select_negnan_negzero(i32 %c) { define <2 x float> @fabs_select_negnan_negzero_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_negnan_negzero_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float -0.000000e+00) ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -577,7 +577,7 @@ define float @fabs_select_negnan_zero(i32 %c) { define <2 x float> @fabs_select_negnan_zero_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_negnan_zero_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> zeroinitializer +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> zeroinitializer ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -651,7 +651,7 @@ define float @fabs_sqrt_nnan_fabs(float %a) { define float @fabs_select_positive_constants_vector_extract(i32 %c) { ; CHECK-LABEL: @fabs_select_positive_constants_vector_extract( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float 2.000000e+00) ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[SELECT]], i32 0 ; CHECK-NEXT: ret float [[EXTRACT]] ; diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll index e9d5c353cbccfc3..45e329bca0b08f0 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll @@ -285,7 +285,7 @@ define i1 @UIToFP_is_nan_or_positive_or_zero(i32 %x) { define <2 x i1> @UIToFP_is_nan_or_positive_or_zero_vec(<2 x i32> %x) { ; CHECK-LABEL: @UIToFP_is_nan_or_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a = uitofp <2 x i32> %x to <2 x float> %r = fcmp uge <2 x float> %a, zeroinitializer @@ -303,7 +303,7 @@ define i1 @UIToFP_is_positive_or_zero(i32 %x) { define <2 x i1> @UIToFP_is_positive_or_zero_vec(<2 x i32> %x) { ; CHECK-LABEL: @UIToFP_is_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a = uitofp <2 x i32> %x to <2 x float> %r = fcmp oge <2 x float> %a, zeroinitializer @@ -321,7 +321,7 @@ define i1 @UIToFP_nnan_is_positive_or_zero(i32 %x) { define <2 x i1> @UIToFP_nnan_is_positive_or_zero_vec(<2 x i32> %x) { ; CHECK-LABEL: @UIToFP_nnan_is_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a = uitofp <2 x i32> %x to <2 x float> %r = fcmp nnan oge <2 x float> %a, zeroinitializer @@ -395,7 +395,7 @@ define i1 @fabs_is_nan_or_positive_or_zero(double %x) { define <2 x i1> @fabs_is_nan_or_positive_or_zero_vec(<2 x double> %x) { ; CHECK-LABEL: @fabs_is_nan_or_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %fabs = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) %cmp = fcmp uge <2 x double> %fabs, zeroinitializer @@ -413,7 +413,7 @@ define i1 @fabs_nnan_is_positive_or_zero(double %x) { define <2 x i1> @fabs_nnan_is_positive_or_zero_vec(<2 x double> %x) { ; CHECK-LABEL: @fabs_nnan_is_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %fabs = tail call nnan <2 x double> @llvm.fabs.v2f64(<2 x double> %x) %cmp = fcmp oge <2 x double> %fabs, zeroinitializer @@ -459,7 +459,7 @@ define i1 @fabs_fcmp_olt0_-assume-nnan_is_positive_or_zero(double %x) { define <2 x i1> @fabs_fcmp-nnan_is_positive_or_zero_vec(<2 x double> %x) { ; CHECK-LABEL: @fabs_fcmp-nnan_is_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %fabs = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) %cmp = fcmp nnan oge <2 x double> %fabs, zeroinitializer @@ -826,7 +826,7 @@ define <2 x i1> @minnum_uge_small_min_constant(<2 x float> %x) { define <2 x i1> @minnum_olt_small_min_constant(<2 x float> %x) { ; CHECK-LABEL: @minnum_olt_small_min_constant( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %min = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> ) %cmp = fcmp olt <2 x float> %min, @@ -999,7 +999,7 @@ define <2 x i1> @maxnum_ule_large_max_constant(<2 x float> %x) { define <2 x i1> @maxnum_ogt_large_max_constant(<2 x float> %x) { ; CHECK-LABEL: @maxnum_ogt_large_max_constant( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %max = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> ) %cmp = fcmp ogt <2 x float> %max, @@ -1307,7 +1307,7 @@ define i1 @assumed_positive_une_with_negative_constant(float %a) { define <2 x i1> @known_positive_uge_with_negative_constant_splat_vec(<2 x float> %a) { ; CHECK-LABEL: @known_positive_uge_with_negative_constant_splat_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %call = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) %cmp = fcmp uge <2 x float> %call, @@ -1325,7 +1325,7 @@ define i1 @known_positive_oeq_with_negative_constant(half %a) { define <2 x i1> @known_positive_une_with_negative_constant_splat_vec(<2 x i32> %a) { ; CHECK-LABEL: @known_positive_une_with_negative_constant_splat_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %call = uitofp <2 x i32> %a to <2 x half> %cmp = fcmp une <2 x half> %call, @@ -1377,7 +1377,7 @@ define <2 x i1> @orderedCompareWithNaNVector_poison_elt(<2 x double> %A) { define <2 x i1> @unorderedCompareWithNaNVector_poison_elt(<2 x double> %A) { ; CHECK-LABEL: @unorderedCompareWithNaNVector_poison_elt( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = fcmp ult <2 x double> %A, ret <2 x i1> %cmp @@ -1486,7 +1486,7 @@ define i1 @is_finite_or_nan(i1 %c, double %x) { define <2 x i1> @is_finite_or_nan_commute(<2 x i8> %x) { ; CHECK-LABEL: @is_finite_or_nan_commute( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cast = uitofp <2 x i8> %x to <2 x float> %r = fcmp une <2 x float> , %cast @@ -1533,7 +1533,7 @@ define i1 @is_finite_assume(i1 %c, double %x) { define <2 x i1> @is_finite_commute(<2 x i8> %x) { ; CHECK-LABEL: @is_finite_commute( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cast = uitofp <2 x i8> %x to <2 x float> %r = fcmp one <2 x float> , %cast diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll index 668a93ddf5a4264..fff6cfd8a3b4bc0 100644 --- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -196,7 +196,7 @@ define <2 x float> @test_minnum_const_inf_nnan_comm_vec(<2 x float> %x) { define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: @test_maxnum_const_inf_nnan_comm_vec( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 0x7FF0000000000000) ; %r = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r @@ -204,7 +204,7 @@ define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) { define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: @test_maximum_const_inf_nnan_comm_vec( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 0x7FF0000000000000) ; %r = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r @@ -727,7 +727,7 @@ define float @minnum_neginf(float %x) { define <2 x double> @minnum_neginf_commute_vec(<2 x double> %x) { ; CHECK-LABEL: @minnum_neginf_commute_vec( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0xFFF0000000000000) ; %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %r @@ -813,7 +813,7 @@ define float @maxnum_x_y_maxnum_z(float %x, float %y, float %z) { define <2 x double> @maxnum_inf(<2 x double> %x) { ; CHECK-LABEL: @maxnum_inf( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF0000000000000) ; %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) ret <2 x double> %val @@ -915,7 +915,7 @@ define <2 x double> @minimum_nan_op1_vec_partial_poison(<2 x double> %x) { define <2 x double> @minimum_nan_op1_vec(<2 x double> %x) { ; CHECK-LABEL: @minimum_nan_op1_vec( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF800DEAD00DEAD) ; %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r @@ -1164,7 +1164,7 @@ define float @minimum_neginf(float %x) { define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) { ; CHECK-LABEL: @minimum_neginf_commute_vec( -; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> splat (double 0xFFF0000000000000), <2 x double> [[X:%.*]]) ; CHECK-NEXT: ret <2 x double> [[R]] ; %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) @@ -1185,7 +1185,7 @@ define float @minimum_inf(float %x) { define <2 x double> @maximum_inf(<2 x double> %x) { ; CHECK-LABEL: @maximum_inf( -; CHECK-NEXT: [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; CHECK-NEXT: [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> splat (double 0x7FF0000000000000)) ; CHECK-NEXT: ret <2 x double> [[VAL]] ; %val = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double>) diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll index 06b23200bafff81..22d01ac8c2ad1a7 100644 --- a/llvm/test/Transforms/InstSimplify/fp-nan.ll +++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll @@ -103,7 +103,7 @@ define @fmul_nan_op0_scalable_vec_1( define <2 x float> @fmul_nan_op1(<2 x float> %x) { ; CHECK-LABEL: @fmul_nan_op1( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 0x7FF8000000000000) ; %r = fmul <2 x float> %x, ret <2 x float> %r @@ -121,7 +121,7 @@ define @fmul_nan_op1_scalable_vec( %x define <2 x double> @fdiv_nan_op0(<2 x double> %x) { ; CHECK-LABEL: @fdiv_nan_op0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0xFFF800000000000F) ; %r = fdiv <2 x double> , %x ret <2 x double> %r diff --git a/llvm/test/Transforms/InstSimplify/fptoi-range.ll b/llvm/test/Transforms/InstSimplify/fptoi-range.ll index 95f2a9d50793c64..f9b28347ca9f945 100644 --- a/llvm/test/Transforms/InstSimplify/fptoi-range.ll +++ b/llvm/test/Transforms/InstSimplify/fptoi-range.ll @@ -157,7 +157,7 @@ define i1 @f16_ui_min2(half %f) { define <2 x i1> @v2f16_si_max(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si_max( ; CHECK-NEXT: [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i32> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i32> [[I]], splat (i32 65504) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptosi <2 x half> %f to <2 x i32> @@ -177,7 +177,7 @@ define <2 x i1> @v2f16_si_max2(<2 x half> %f) { define <2 x i1> @v2f16_si16_max2(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si16_max2( ; CHECK-NEXT: [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i16> -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[I]], splat (i16 -32) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptosi <2 x half> %f to <2 x i16> @@ -187,7 +187,7 @@ define <2 x i1> @v2f16_si16_max2(<2 x half> %f) { define <2 x i1> @v2f16_si_min1(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si_min1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %i = fptosi <2 x half> %f to <2 x i32> %c = icmp sge <2 x i32> %i, @@ -197,7 +197,7 @@ define <2 x i1> @v2f16_si_min1(<2 x half> %f) { define <2 x i1> @v2f16_si16_min1(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si16_min1( ; CHECK-NEXT: [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i16> -; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i16> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i16> [[I]], splat (i16 32) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptosi <2 x half> %f to <2 x i16> @@ -208,7 +208,7 @@ define <2 x i1> @v2f16_si16_min1(<2 x half> %f) { define <2 x i1> @v2f16_si_min2(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si_min2( ; CHECK-NEXT: [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[I]], splat (i32 -65504) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptosi <2 x half> %f to <2 x i32> @@ -219,7 +219,7 @@ define <2 x i1> @v2f16_si_min2(<2 x half> %f) { define <2 x i1> @v2f16_ui_max1(<2 x half> %f) { ; CHECK-LABEL: @v2f16_ui_max1( ; CHECK-NEXT: [[I:%.*]] = fptoui <2 x half> [[F:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i32> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i32> [[I]], splat (i32 65504) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptoui <2 x half> %f to <2 x i32> @@ -239,7 +239,7 @@ define <2 x i1> @v2f16_ui_max2(<2 x half> %f) { define <2 x i1> @v2f16_ui16_max2(<2 x half> %f) { ; CHECK-LABEL: @v2f16_ui16_max2( ; CHECK-NEXT: [[I:%.*]] = fptoui <2 x half> [[F:%.*]] to <2 x i16> -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[I]], splat (i16 -32) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptoui <2 x half> %f to <2 x i16> @@ -249,7 +249,7 @@ define <2 x i1> @v2f16_ui16_max2(<2 x half> %f) { define <2 x i1> @v2f16_ui16_max3(<2 x half> %f) { ; CHECK-LABEL: @v2f16_ui16_max3( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %i = fptoui <2 x half> %f to <2 x i16> %c = icmp ule <2 x i16> %i, @@ -258,7 +258,7 @@ define <2 x i1> @v2f16_ui16_max3(<2 x half> %f) { define <2 x i1> @v2f16_ui_min1(<2 x half> %f) { ; CHECK-LABEL: @v2f16_ui_min1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %i = fptoui <2 x half> %f to <2 x i32> %c = icmp sge <2 x i32> %i, diff --git a/llvm/test/Transforms/InstSimplify/frexp.ll b/llvm/test/Transforms/InstSimplify/frexp.ll index 63fe1dec6929331..34cfce92bac43e9 100644 --- a/llvm/test/Transforms/InstSimplify/frexp.ll +++ b/llvm/test/Transforms/InstSimplify/frexp.ll @@ -9,8 +9,8 @@ declare { , } @llvm.frexp.nxv2f32.nxv2i32 define { float, i32 } @frexp_frexp(float %x) { -; CHECK-LABEL: define { float, i32 } @frexp_frexp -; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-LABEL: define { float, i32 } @frexp_frexp( +; CHECK-SAME: float [[X:%.*]]) { ; CHECK-NEXT: [[FREXP0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) ; CHECK-NEXT: ret { float, i32 } [[FREXP0]] ; @@ -21,8 +21,8 @@ define { float, i32 } @frexp_frexp(float %x) { } define { <2 x float>, <2 x i32> } @frexp_frexp_vector(<2 x float> %x) { -; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_frexp_vector -; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_frexp_vector( +; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: [[FREXP0:%.*]] = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> [[X]]) ; CHECK-NEXT: ret { <2 x float>, <2 x i32> } [[FREXP0]] ; @@ -33,8 +33,8 @@ define { <2 x float>, <2 x i32> } @frexp_frexp_vector(<2 x float> %x) { } define { float, i32 } @frexp_frexp_const(float %x) { -; CHECK-LABEL: define { float, i32 } @frexp_frexp_const -; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-LABEL: define { float, i32 } @frexp_frexp_const( +; CHECK-SAME: float [[X:%.*]]) { ; CHECK-NEXT: ret { float, i32 } { float 6.562500e-01, i32 0 } ; %frexp0 = call { float, i32 } @llvm.frexp.f32.i32(float 42.0) @@ -44,8 +44,8 @@ define { float, i32 } @frexp_frexp_const(float %x) { } define { , } @frexp_frexp_scalable_vector( %x) { -; CHECK-LABEL: define { , } @frexp_frexp_scalable_vector -; CHECK-SAME: ( [[X:%.*]]) { +; CHECK-LABEL: define { , } @frexp_frexp_scalable_vector( +; CHECK-SAME: [[X:%.*]]) { ; CHECK-NEXT: [[FREXP0:%.*]] = call { , } @llvm.frexp.nxv2f32.nxv2i32( [[X]]) ; CHECK-NEXT: ret { , } [[FREXP0]] ; @@ -252,7 +252,7 @@ define { ppc_fp128, i32} @canonicalize_noncanonical_zero_1_ppcf128() { define { <2 x float>, <2 x i32> } @frexp_splat_4() { ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_4() { -; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> , <2 x i32> } +; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 5.000000e-01), <2 x i32> splat (i32 3) } ; %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> ) ret { <2 x float>, <2 x i32> } %ret @@ -260,7 +260,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_4() { define { <2 x float>, <2 x i32> } @frexp_splat_qnan() { ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_qnan() { -; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> , <2 x i32> zeroinitializer } +; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0x7FF8000000000000), <2 x i32> zeroinitializer } ; %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> ) ret { <2 x float>, <2 x i32> } %ret @@ -268,7 +268,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_qnan() { define { <2 x float>, <2 x i32> } @frexp_splat_inf() { ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_inf() { -; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> , <2 x i32> zeroinitializer } +; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0x7FF0000000000000), <2 x i32> zeroinitializer } ; %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> ) ret { <2 x float>, <2 x i32> } %ret @@ -276,7 +276,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_inf() { define { <2 x float>, <2 x i32> } @frexp_splat_neginf() { ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_neginf() { -; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> , <2 x i32> zeroinitializer } +; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0xFFF0000000000000), <2 x i32> zeroinitializer } ; %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> ) ret { <2 x float>, <2 x i32> } %ret diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll index f682fe82ca7986e..276707894146e87 100644 --- a/llvm/test/Transforms/InstSimplify/gep.ll +++ b/llvm/test/Transforms/InstSimplify/gep.ll @@ -206,7 +206,7 @@ define ptr @ptr_idx_scalar() { define <2 x ptr> @ptr_idx_vector() { ; CHECK-LABEL: @ptr_idx_vector( -; CHECK-NEXT: ret <2 x ptr> getelementptr (i32, ptr null, <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> getelementptr (i32, ptr null, <2 x i64> splat (i64 1)) ; %gep = getelementptr i32, ptr null, <2 x i64> ret <2 x ptr> %gep @@ -224,7 +224,7 @@ define <4 x ptr> @ptr_idx_mix_scalar_vector(){ define <4 x ptr> @vector_idx_scalar() { ; CHECK-LABEL: @vector_idx_scalar( -; CHECK-NEXT: ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ) +; CHECK-NEXT: ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> splat (i64 1)) ; %gep = getelementptr i32, <4 x ptr> zeroinitializer, i64 1 ret <4 x ptr> %gep @@ -232,7 +232,7 @@ define <4 x ptr> @vector_idx_scalar() { define <4 x ptr> @vector_idx_vector() { ; CHECK-LABEL: @vector_idx_vector( -; CHECK-NEXT: ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ) +; CHECK-NEXT: ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> splat (i64 1)) ; %gep = getelementptr i32, <4 x ptr> zeroinitializer, <4 x i64> ret <4 x ptr> %gep @@ -370,6 +370,14 @@ define <8 x ptr> @gep_vector_index_op3_poison_constant_index_afterwards(ptr %ptr } define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) { +; CHECK-LABEL: @gep_array_of_scalable_vectors_ptrdiff( +; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds [8 x ], ptr [[PTR:%.*]], i64 4 +; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds [8 x ], ptr [[PTR]], i64 6 +; CHECK-NEXT: [[C1_INT:%.*]] = ptrtoint ptr [[C1]] to i64 +; CHECK-NEXT: [[C2_INT:%.*]] = ptrtoint ptr [[C2]] to i64 +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[C2_INT]], [[C1_INT]] +; CHECK-NEXT: ret i64 [[DIFF]] +; %c1 = getelementptr inbounds [8 x ], ptr %ptr, i64 4 %c2 = getelementptr inbounds [8 x ], ptr %ptr, i64 6 %c1.int = ptrtoint ptr %c1 to i64 diff --git a/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll index a501f995b6c9757..350093dcda82d94 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll @@ -31,7 +31,7 @@ define <2 x i1> @eq_f(<2 x i1> %a) { define <2 x i1> @ne_t(<2 x i1> %a) { ; CHECK-LABEL: @ne_t( -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %r = icmp ne <2 x i1> %a, @@ -72,7 +72,7 @@ define <2 x i1> @ugt_f(<2 x i1> %a) { define <2 x i1> @ult_t(<2 x i1> %a) { ; CHECK-LABEL: @ult_t( -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %r = icmp ult <2 x i1> %a, @@ -89,7 +89,7 @@ define <2 x i1> @ult_f(<2 x i1> %a) { define <2 x i1> @sgt_t(<2 x i1> %a) { ; CHECK-LABEL: @sgt_t( -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %r = icmp sgt <2 x i1> %a, @@ -130,7 +130,7 @@ define <2 x i1> @uge_t(<2 x i1> %a) { define <2 x i1> @uge_f(<2 x i1> %a) { ; CHECK-LABEL: @uge_f( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp uge <2 x i1> %a, ret <2 x i1> %r @@ -138,7 +138,7 @@ define <2 x i1> @uge_f(<2 x i1> %a) { define <2 x i1> @ule_t(<2 x i1> %a) { ; CHECK-LABEL: @ule_t( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp ule <2 x i1> %a, ret <2 x i1> %r @@ -155,7 +155,7 @@ define <2 x i1> @ule_f(<2 x i1> %a) { define <2 x i1> @sge_t(<2 x i1> %a) { ; CHECK-LABEL: @sge_t( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp sge <2 x i1> %a, ret <2 x i1> %r @@ -163,7 +163,7 @@ define <2 x i1> @sge_t(<2 x i1> %a) { define <2 x i1> @sge_t_poison_elt(<2 x i1> %a) { ; CHECK-LABEL: @sge_t_poison_elt( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp sge <2 x i1> %a, ret <2 x i1> %r @@ -188,7 +188,7 @@ define <2 x i1> @sle_t(<2 x i1> %a) { define <2 x i1> @sle_f(<2 x i1> %a) { ; CHECK-LABEL: @sle_f( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp sle <2 x i1> %a, ret <2 x i1> %r diff --git a/llvm/test/Transforms/InstSimplify/icmp-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-constant.ll index 4f3a5d3dba3b5c2..21244f832cbb94a 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-constant.ll @@ -13,7 +13,7 @@ define i1 @tautological_ule(i8 %x) { define <2 x i1> @tautological_ule_vec(<2 x i8> %x) { ; CHECK-LABEL: @tautological_ule_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp ule <2 x i8> %x, ret <2 x i1> %cmp @@ -21,7 +21,7 @@ define <2 x i1> @tautological_ule_vec(<2 x i8> %x) { define <2 x i1> @tautological_ule_vec_partial_poison(<2 x i8> %x) { ; CHECK-LABEL: @tautological_ule_vec_partial_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp ule <2 x i8> %x, ret <2 x i1> %cmp @@ -63,7 +63,7 @@ define i1 @urem3(i32 %X) { define <2 x i1> @urem3_vec(<2 x i32> %X) { ; CHECK-LABEL: @urem3_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %A = urem <2 x i32> %X, %B = icmp ult <2 x i32> %A, @@ -72,7 +72,7 @@ define <2 x i1> @urem3_vec(<2 x i32> %X) { define <2 x i1> @urem3_vec_partial_poison(<2 x i32> %X) { ; CHECK-LABEL: @urem3_vec_partial_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %A = urem <2 x i32> %X, %B = icmp ult <2 x i32> %A, @@ -138,7 +138,7 @@ define i1 @udiv1(i32 %X) { define <2 x i1> @udiv1_vec(<2 x i32> %X) { ; CHECK-LABEL: @udiv1_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %A = udiv <2 x i32> %X, %B = icmp ult <2 x i32> %A, @@ -177,7 +177,7 @@ define i1 @sdiv1(i32 %X) { define <2 x i1> @sdiv1_vec(<2 x i32> %X) { ; CHECK-LABEL: @sdiv1_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %A = sdiv <2 x i32> %X, %B = icmp slt <2 x i32> %A, @@ -196,7 +196,7 @@ define i1 @shl5(i32 %X) { define <2 x i1> @shl5_vec(<2 x i32> %X) { ; CHECK-LABEL: @shl5_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = shl nuw <2 x i32> , %X %cmp = icmp ugt <2 x i32> %sub, @@ -205,7 +205,7 @@ define <2 x i1> @shl5_vec(<2 x i32> %X) { define <2 x i1> @shl5_vec_partial_poison(<2 x i32> %X) { ; CHECK-LABEL: @shl5_vec_partial_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = shl nuw <2 x i32> , %X %cmp = icmp ugt <2 x i32> %sub, @@ -243,7 +243,7 @@ define i1 @shl4(i32 %X) { define <2 x i1> @shl4_vec(<2 x i32> %X) { ; CHECK-LABEL: @shl4_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = shl nsw <2 x i32> , %X %cmp = icmp sle <2 x i32> %sub, @@ -262,7 +262,7 @@ define i1 @icmp_shl_nsw_1(i64 %a) { define <2 x i1> @icmp_shl_nsw_1_vec(<2 x i64> %a) { ; CHECK-LABEL: @icmp_shl_nsw_1_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %shl = shl nsw <2 x i64> , %a %cmp = icmp sge <2 x i64> %shl, zeroinitializer @@ -605,7 +605,7 @@ define i1 @tautological9(i32 %x) { define <2 x i1> @tautological9_vec(<2 x i32> %x) { ; CHECK-LABEL: @tautological9_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nuw <2 x i32> %x, %cmp = icmp ne <2 x i32> %add, @@ -826,7 +826,7 @@ define i1 @add_nsw_pos_const6(i32 %x) { define <2 x i1> @add_nsw_pos_const5_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @add_nsw_pos_const5_splat_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nsw <2 x i32> %x, %cmp = icmp ne <2 x i32> %add, @@ -891,7 +891,7 @@ define i1 @mul_nuw_urem_cmp_constant1(i8 %x) { define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @mul_nuw_urem_cmp_constant_vec_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nuw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -902,7 +902,7 @@ define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat(<2 x i8> %x) { define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat_poison1(<2 x i8> %x) { ; CHECK-LABEL: @mul_nuw_urem_cmp_constant_vec_splat_poison1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nuw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -913,7 +913,7 @@ define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat_poison1(<2 x i8> %x) { define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat_poison2(<2 x i8> %x) { ; CHECK-LABEL: @mul_nuw_urem_cmp_constant_vec_splat_poison2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nuw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -998,7 +998,7 @@ define i1 @mul_nsw_srem_cmp_constant1(i8 %x) { define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @mul_nsw_srem_cmp_constant_vec_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nsw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -1009,7 +1009,7 @@ define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat(<2 x i8> %x) { define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat_poison1(<2 x i8> %x) { ; CHECK-LABEL: @mul_nsw_srem_cmp_constant_vec_splat_poison1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nsw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -1020,7 +1020,7 @@ define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat_poison1(<2 x i8> %x) { define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat_poison2(<2 x i8> %x) { ; CHECK-LABEL: @mul_nsw_srem_cmp_constant_vec_splat_poison2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nsw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -1215,7 +1215,7 @@ define <2 x i1> @icmp_eq_constant_range_attr_vec(<2 x i8> range(i8 0, 10) %i) { define <2 x i1> @neg_icmp_eq_constant_range_attr_vec(<2 x i8> range(i8 0, 11) %i) { ; CHECK-LABEL: @neg_icmp_eq_constant_range_attr_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I:%.*]], splat (i8 10) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp eq <2 x i8> %i, @@ -1238,7 +1238,7 @@ define <2 x i1> @icmp_eq_constant_range_return_vec() { define <2 x i1> @neg_icmp_eq_constant_range_return_vec() { ; CHECK-LABEL: @neg_icmp_eq_constant_range_return_vec( ; CHECK-NEXT: [[I:%.*]] = call <2 x i8> @returns_contain_ten_range_helper_vec() -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I]], splat (i8 10) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %i = call <2 x i8> @returns_contain_ten_range_helper_vec() @@ -1261,7 +1261,7 @@ define <2 x i1> @icmp_eq_constant_range_call_vec() { define <2 x i1> @neg_icmp_eq_constant_range_call_vec() { ; CHECK-LABEL: @neg_icmp_eq_constant_range_call_vec( ; CHECK-NEXT: [[I:%.*]] = call range(i8 0, 11) <2 x i8> @returns_i8_helper_vec() -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I]], splat (i8 10) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %i = call range(i8 0, 11) <2 x i8> @returns_i8_helper_vec() diff --git a/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll index 045d773bf32841f..e8b047d51df3011 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll @@ -7,7 +7,7 @@ define <2 x i1> @eq_t_not(<2 x i1> %a) { ; CHECK-LABEL: @eq_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -71,7 +71,7 @@ define <2 x i1> @ne_t_not_poison(<2 x i1> %a) { define <2 x i1> @ne_f_not(<2 x i1> %a) { ; CHECK-LABEL: @ne_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -90,7 +90,7 @@ define <2 x i1> @ugt_t_not(<2 x i1> %a) { define <2 x i1> @ugt_f_not(<2 x i1> %a) { ; CHECK-LABEL: @ugt_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -181,7 +181,7 @@ define <2 x i1> @slt_t_not(<2 x i1> %a) { define <2 x i1> @slt_f_not(<2 x i1> %a) { ; CHECK-LABEL: @slt_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -191,7 +191,7 @@ define <2 x i1> @slt_f_not(<2 x i1> %a) { define <2 x i1> @uge_t_not(<2 x i1> %a) { ; CHECK-LABEL: @uge_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -201,7 +201,7 @@ define <2 x i1> @uge_t_not(<2 x i1> %a) { define <2 x i1> @uge_f_not(<2 x i1> %a) { ; CHECK-LABEL: @uge_f_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp uge <2 x i1> %not, @@ -210,7 +210,7 @@ define <2 x i1> @uge_f_not(<2 x i1> %a) { define <2 x i1> @ule_t_not(<2 x i1> %a) { ; CHECK-LABEL: @ule_t_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp ule <2 x i1> %not, @@ -246,7 +246,7 @@ define <2 x i1> @ule_f_not_poison(<2 x i1> %a) { define <2 x i1> @sge_t_not(<2 x i1> %a) { ; CHECK-LABEL: @sge_t_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp sge <2 x i1> %not, @@ -282,7 +282,7 @@ define <2 x i1> @sge_f_not_poison(<2 x i1> %a) { define <2 x i1> @sle_t_not(<2 x i1> %a) { ; CHECK-LABEL: @sle_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -292,7 +292,7 @@ define <2 x i1> @sle_t_not(<2 x i1> %a) { define <2 x i1> @sle_f_not(<2 x i1> %a) { ; CHECK-LABEL: @sle_f_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp sle <2 x i1> %not, diff --git a/llvm/test/Transforms/InstSimplify/icmp.ll b/llvm/test/Transforms/InstSimplify/icmp.ll index c94922197096f27..2f2e0d32669d797 100644 --- a/llvm/test/Transforms/InstSimplify/icmp.ll +++ b/llvm/test/Transforms/InstSimplify/icmp.ll @@ -232,7 +232,7 @@ define i1 @sub_swap(i8 %x) { define <2 x i1> @sub_odd(<2 x i8> %x) { ; CHECK-LABEL: @sub_odd( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = sub <2 x i8> , %x %cmp = icmp ne <2 x i8> %sub, %x @@ -241,7 +241,7 @@ define <2 x i1> @sub_odd(<2 x i8> %x) { define <2 x i1> @sub_odd_poison(<2 x i8> %x) { ; CHECK-LABEL: @sub_odd_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = sub <2 x i8> , %x %cmp = icmp ne <2 x i8> %sub, %x diff --git a/llvm/test/Transforms/InstSimplify/implies.ll b/llvm/test/Transforms/InstSimplify/implies.ll index 7e3cb656bce158e..53d8d79add3016f 100644 --- a/llvm/test/Transforms/InstSimplify/implies.ll +++ b/llvm/test/Transforms/InstSimplify/implies.ll @@ -99,7 +99,7 @@ define i1 @test4(i32 %length.i, i32 %i) { ; A ==> A for vectors define <4 x i1> @test5(<4 x i1> %vec) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %res = icmp ule <4 x i1> %vec, %vec ret <4 x i1> %res diff --git a/llvm/test/Transforms/InstSimplify/insertelement.ll b/llvm/test/Transforms/InstSimplify/insertelement.ll index 3fe8b8331a40c07..40c075bf1ea6672 100644 --- a/llvm/test/Transforms/InstSimplify/insertelement.ll +++ b/llvm/test/Transforms/InstSimplify/insertelement.ll @@ -122,7 +122,7 @@ unreachable_infloop: define <4 x i32> @insert_into_splat(i32 %index) { ; CHECK-LABEL: @insert_into_splat( -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 3) ; %I = insertelement <4 x i32> , i32 3, i32 %index ret <4 x i32> %I diff --git a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll index 4d662c08b1a7a16..af83f00368597f8 100644 --- a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll +++ b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll @@ -1123,7 +1123,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_maximum(<4 x double> %x) { define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail(<4 x double> %x) { ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail ; CHECK-SAME: (<4 x double> [[X:%.*]]) { -; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[NINF_X]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 ; CHECK-NEXT: ret i1 [[CMP]] @@ -1148,7 +1148,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_minimum(<4 x double> %x) { define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail(<4 x double> %x) { ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail ; CHECK-SAME: (<4 x double> [[X:%.*]]) { -; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[NINF_X]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 ; CHECK-NEXT: ret i1 [[CMP]] @@ -1173,7 +1173,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_fmax(<4 x double> %x) { define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail(<4 x double> %x) { ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail ; CHECK-SAME: (<4 x double> [[X:%.*]]) { -; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[NINF_X]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 ; CHECK-NEXT: ret i1 [[CMP]] @@ -1198,7 +1198,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_fmin(<4 x double> %x) { define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail(<4 x double> %x) { ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail ; CHECK-SAME: (<4 x double> [[X:%.*]]) { -; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[NINF_X]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 ; CHECK-NEXT: ret i1 [[CMP]] diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index 965c333d306d146..1445402d1feb20e 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -192,7 +192,7 @@ define <4 x i1> @shuf_nonzero_both(<4 x i8> %xx, <4 x i8> %yy) { define <4 x i1> @shuf_nonzero_both_fail(<4 x i8> %xx, <4 x i8> %yy) { ; CHECK-LABEL: @shuf_nonzero_both_fail( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], splat (i8 1) ; CHECK-NEXT: [[Y:%.*]] = add nuw <4 x i8> [[YY:%.*]], ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer @@ -208,8 +208,8 @@ define <4 x i1> @shuf_nonzero_both_fail(<4 x i8> %xx, <4 x i8> %yy) { define <4 x i1> @shuf_nonzero_both_fail2(<4 x i8> %xx, <4 x i8> %yy) { ; CHECK-LABEL: @shuf_nonzero_both_fail2( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[Y:%.*]] = add <4 x i8> [[YY:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], splat (i8 1) +; CHECK-NEXT: [[Y:%.*]] = add <4 x i8> [[YY:%.*]], splat (i8 1) ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[R]] @@ -390,7 +390,7 @@ define i1 @nonzero_reduce_or(<2 x i8> %xx) { define i1 @nonzero_reduce_or_fail(<2 x i8> %xx) { ; CHECK-LABEL: @nonzero_reduce_or_fail( -; CHECK-NEXT: [[X:%.*]] = add nsw <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nsw <2 x i8> [[XX:%.*]], splat (i8 1) ; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> [[X]]) ; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 ; CHECK-NEXT: ret i1 [[R]] diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index 5c659a1e7086a63..ddd4140b3aeddf3 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -99,7 +99,7 @@ define i3 @smin_poison(i3 %x) { define <2 x i8> @umax_undef(<2 x i8> %x) { ; CHECK-LABEL: @umax_undef( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> %x) ret <2 x i8> %r @@ -107,7 +107,7 @@ define <2 x i8> @umax_undef(<2 x i8> %x) { define <2 x i8> @umax_poison(<2 x i8> %x) { ; CHECK-LABEL: @umax_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> poison, <2 x i8> %x) ret <2 x i8> %r @@ -139,7 +139,7 @@ define i8 @smax_maxval(i8 %x) { define <2 x i8> @smax_maxval_commute(<2 x i8> %x) { ; CHECK-LABEL: @smax_maxval_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 127) ; %r = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -155,7 +155,7 @@ define i8 @smin_minval(i8 %x) { define <2 x i8> @smin_minval_commute(<2 x i8> %x) { ; CHECK-LABEL: @smin_minval_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -128) ; %r = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) ret <2 x i8> %r @@ -171,7 +171,7 @@ define i8 @umax_maxval(i8 %x) { define <2 x i8> @umax_maxval_commute(<2 x i8> %x) { ; CHECK-LABEL: @umax_maxval_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -259,7 +259,7 @@ define <2 x i8> @umin_maxval_commute(<2 x i8> %x) { define <2 x i8> @smax_maxval_partial_poison(<2 x i8> %x) { ; CHECK-LABEL: @smax_maxval_partial_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 127) ; %r = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -267,7 +267,7 @@ define <2 x i8> @smax_maxval_partial_poison(<2 x i8> %x) { define <2 x i8> @smin_minval_partial_poison(<2 x i8> %x) { ; CHECK-LABEL: @smin_minval_partial_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -128) ; %r = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) ret <2 x i8> %r @@ -275,7 +275,7 @@ define <2 x i8> @smin_minval_partial_poison(<2 x i8> %x) { define <2 x i8> @umax_maxval_partial_poison(<2 x i8> %x) { ; CHECK-LABEL: @umax_maxval_partial_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -709,7 +709,7 @@ define i8 @umax_umax_constants_commute2(i8 %x) { define <2 x i8> @umax_umax_constants_commute3(<2 x i8> %x) { ; CHECK-LABEL: @umax_umax_constants_commute3( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> splat (i8 -2), <2 x i8> [[X:%.*]]) ; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) @@ -739,7 +739,7 @@ define i8 @umin_umin_constants_commute1(i8 %x) { define <2 x i8> @umin_umin_constants_commute2(<2 x i8> %x) { ; CHECK-LABEL: @umin_umin_constants_commute2( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 127)) ; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) @@ -769,7 +769,7 @@ define i8 @smax_smax_constants(i8 %x) { define <2 x i8> @smax_smax_constants_commute1(<2 x i8> %x) { ; CHECK-LABEL: @smax_smax_constants_commute1( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> splat (i8 7), <2 x i8> [[X:%.*]]) ; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) @@ -799,7 +799,7 @@ define i8 @smax_smax_constants_commute3(i8 %x) { define <2 x i8> @smin_smin_constants(<2 x i8> %x) { ; CHECK-LABEL: @smin_smin_constants( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 7)) ; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) @@ -842,7 +842,7 @@ define i8 @smin_smin_constants_commute3(i8 %x) { define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @umin_umin_constants_partial_undef( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> , <2 x i8> [[M]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> splat (i8 9), <2 x i8> [[M]]) ; CHECK-NEXT: ret <2 x i8> [[M2]] ; %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) @@ -855,7 +855,7 @@ define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) { define <2 x i8> @smax_smax_constants_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @smax_smax_constants_partial_undef( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[M]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> splat (i8 9), <2 x i8> [[M]]) ; CHECK-NEXT: ret <2 x i8> [[M2]] ; %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %x, <2 x i8> ) diff --git a/llvm/test/Transforms/InstSimplify/negate.ll b/llvm/test/Transforms/InstSimplify/negate.ll index d07029becd1fe9a..95d8f46ac5d249a 100644 --- a/llvm/test/Transforms/InstSimplify/negate.ll +++ b/llvm/test/Transforms/InstSimplify/negate.ll @@ -64,7 +64,7 @@ define i8 @negate_zero_or_minsigned(i8 %x) { define <2 x i8> @negate_zero_or_minsigned_vec(<2 x i8> %x) { ; CHECK-LABEL: @negate_zero_or_minsigned_vec( -; CHECK-NEXT: [[SIGNBIT:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SIGNBIT:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i8> [[SIGNBIT]] ; %signbit = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstSimplify/or-icmps-same-ops.ll b/llvm/test/Transforms/InstSimplify/or-icmps-same-ops.ll index fb67ba309876fcd..440bc7497e0963c 100644 --- a/llvm/test/Transforms/InstSimplify/or-icmps-same-ops.ll +++ b/llvm/test/Transforms/InstSimplify/or-icmps-same-ops.ll @@ -1192,7 +1192,7 @@ define i1 @ult_ult(i8 %a, i8 %b) { define <2 x i1> @ult_uge_vec(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @ult_uge_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp1 = icmp ult <2 x i8> %a, %b %cmp2 = icmp uge <2 x i8> %a, %b diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll index f241c6987b9e708..ae60e2def0797c0 100644 --- a/llvm/test/Transforms/InstSimplify/or.ll +++ b/llvm/test/Transforms/InstSimplify/or.ll @@ -19,7 +19,7 @@ define i32 @all_ones(i32 %A) { define <3 x i8> @all_ones_vec_with_poison_elt(<3 x i8> %A) { ; CHECK-LABEL: @all_ones_vec_with_poison_elt( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> splat (i8 -1) ; %B = or <3 x i8> %A, ret <3 x i8> %B @@ -70,7 +70,7 @@ define i32 @or_not(i32 %A) { define <2 x i4> @or_not_commute_vec_poison(<2 x i4> %A) { ; CHECK-LABEL: @or_not_commute_vec_poison( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %NotA = xor <2 x i4> %A, %B = or <2 x i4> %NotA, %A @@ -130,7 +130,7 @@ define i8 @test11(i8 %A) { define i8 @test11v(<2 x i8> %A) { ; CHECK-LABEL: @test11v( ; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CV:%.*]] = xor <2 x i8> [[B]], +; CHECK-NEXT: [[CV:%.*]] = xor <2 x i8> [[B]], splat (i8 13) ; CHECK-NEXT: [[C:%.*]] = extractelement <2 x i8> [[CV]], i32 0 ; CHECK-NEXT: [[D:%.*]] = or i8 [[C]], 1 ; CHECK-NEXT: [[E:%.*]] = xor i8 [[D]], 12 @@ -221,7 +221,7 @@ define i117 @test6_apint(i117 %X) { ; replace with V+N. define <2 x i39> @test7_apint(<2 x i39> %V, <2 x i39> %M) { ; CHECK-LABEL: @test7_apint( -; CHECK-NEXT: [[N:%.*]] = and <2 x i39> [[M:%.*]], +; CHECK-NEXT: [[N:%.*]] = and <2 x i39> [[M:%.*]], splat (i39 -274877906944) ; CHECK-NEXT: [[A:%.*]] = add <2 x i39> [[N]], [[V:%.*]] ; CHECK-NEXT: ret <2 x i39> [[A]] ; @@ -241,7 +241,7 @@ define <2 x i39> @test7_apint(<2 x i39> %V, <2 x i39> %M) { ; replace with V+N. define <2 x i399> @test8_apint(<2 x i399> %V, <2 x i399> %M) { ; CHECK-LABEL: @test8_apint( -; CHECK-NEXT: [[N:%.*]] = and <2 x i399> [[M:%.*]], +; CHECK-NEXT: [[N:%.*]] = and <2 x i399> [[M:%.*]], splat (i399 18446742974197923840) ; CHECK-NEXT: [[A:%.*]] = add <2 x i399> [[N]], [[V:%.*]] ; CHECK-NEXT: ret <2 x i399> [[A]] ; @@ -320,7 +320,7 @@ define i8 @or_with_not_op_commute2(i8 %a, i8 %b) { define <3 x i17> @or_with_not_op_commute3(<3 x i17> %a, <3 x i17> %b) { ; CHECK-LABEL: @or_with_not_op_commute3( -; CHECK-NEXT: ret <3 x i17> +; CHECK-NEXT: ret <3 x i17> splat (i17 -1) ; %ab = and <3 x i17> %a, %b %not = xor <3 x i17> %ab, @@ -332,7 +332,7 @@ define <3 x i17> @or_with_not_op_commute3(<3 x i17> %a, <3 x i17> %b) { define <2 x i1> @or_with_not_op_commute4(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_with_not_op_commute4( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %ab = and <2 x i1> %b, %a %not = xor <2 x i1> %ab, @@ -391,7 +391,7 @@ define i8 @and_or_not_or_commute2(i8 %A, i8 %B) { define <2 x i4> @and_or_not_or_commute3(<2 x i4> %A, <2 x i4> %B) { ; CHECK-LABEL: @and_or_not_or_commute3( -; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], +; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOTA]] ; %nota = xor <2 x i4> %A, @@ -443,7 +443,7 @@ define i8 @and_or_not_or_commute6(i8 %A, i8 %B) { define <2 x i4> @and_or_not_or_commute7(<2 x i4> %A, <2 x i4> %B) { ; CHECK-LABEL: @and_or_not_or_commute7( -; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], +; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOTA]] ; %nota = xor <2 x i4> %A, @@ -503,7 +503,7 @@ define <2 x i4> @and_or_not_or_commute7_undef_elt(<2 x i4> %A, <2 x i4> %B) { ; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], ; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[B:%.*]], [[NOTA]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i4> [[B]], [[A]] -; CHECK-NEXT: [[NOTAB:%.*]] = xor <2 x i4> [[OR]], +; CHECK-NEXT: [[NOTAB:%.*]] = xor <2 x i4> [[OR]], splat (i4 -1) ; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[NOTAB]], [[AND]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -608,7 +608,7 @@ define i32 @shifted_all_ones_commute(i32 %shamt) { define <2 x i9> @shifted_all_ones_sub_on_lshr(<2 x i9> %shamt) { ; CHECK-LABEL: @shifted_all_ones_sub_on_lshr( -; CHECK-NEXT: ret <2 x i9> +; CHECK-NEXT: ret <2 x i9> splat (i9 -1) ; %l = shl <2 x i9> , %shamt %s = sub <2 x i9> , %shamt @@ -697,7 +697,7 @@ define i4 @or_nxor_and_commute0(i4 %a, i4 %b) { define <2 x i4> @or_nxor_and_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_and_commute1( ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i4> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %and = and <2 x i4> %a, %b @@ -723,7 +723,7 @@ define i74 @or_nxor_and_commute2(i74 %a, i74 %b) { define <2 x i4> @or_nxor_and_commute3(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_and_commute3( ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i4> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %and = and <2 x i4> %b, %a @@ -814,7 +814,7 @@ define i4 @or_nxor_or_commute0(i4 %a, i4 %b) { define <2 x i4> @or_nxor_or_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_or_commute1( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %or = or <2 x i4> %a, %b %xor = xor <2 x i4> %a, %b @@ -836,7 +836,7 @@ define i74 @or_nxor_or_commute2(i74 %a, i74 %b) { define <2 x i4> @or_nxor_or_commute3(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_or_commute3( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %or = or <2 x i4> %b, %a %xor = xor <2 x i4> %a, %b @@ -883,7 +883,7 @@ define i4 @or_nxor_or_wrong_val2(i4 %a, i4 %b, i4 %c) { define <2 x i4> @or_nxor_or_poison_elt(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_or_poison_elt( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %or = or <2 x i4> %b, %a %xor = xor <2 x i4> %a, %b @@ -998,7 +998,7 @@ define i32 @or_xor_not_op_or_commute7(i32 %a, i32 %b){ define <2 x i4> @or_xor_not_op_or_poison_elt(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_xor_not_op_or_poison_elt( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %xor = xor <2 x i4> %a, %b %nota = xor <2 x i4> %a, @@ -1042,7 +1042,7 @@ define i4 @or_nand_xor(i4 %x, i4 %y) { define <2 x i4> @or_nand_xor_commute1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @or_nand_xor_commute1( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[AND]], +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[AND]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NAND]] ; %and = and <2 x i4> %y, %x diff --git a/llvm/test/Transforms/InstSimplify/pr28725.ll b/llvm/test/Transforms/InstSimplify/pr28725.ll index 81884c9270ac2a3..9de5bed6aa8cee6 100644 --- a/llvm/test/Transforms/InstSimplify/pr28725.ll +++ b/llvm/test/Transforms/InstSimplify/pr28725.ll @@ -6,7 +6,7 @@ define <2 x i16> @test1() { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 extractelement (<2 x i16> bitcast (<1 x i32> to <2 x i16>), i32 0), 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 extractelement (<2 x i16> bitcast (<1 x i32> splat (i32 1) to <2 x i16>), i32 0), 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], [[S:%.*]] zeroinitializer, [[S]] { i16 0, i32 1 } ; CHECK-NEXT: [[E:%.*]] = extractvalue [[S]] [[SEL]], 0 ; CHECK-NEXT: [[B:%.*]] = insertelement <2 x i16> , i16 [[E]], i32 0 diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll index d2c4a5dd7f03531..5e7c636d6231809 100644 --- a/llvm/test/Transforms/InstSimplify/ptrmask.ll +++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll @@ -170,7 +170,7 @@ define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) { define <2 x ptr> @ptrmask_simplify_aligned_unused_vec(<2 x ptr> align 128 %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_aligned_unused_vec ; CHECK-SAME: (<2 x ptr> align 128 [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64)) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -212,8 +212,8 @@ define ptr @ptrmask_simplify_known_unused(ptr %p) { define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64)) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) ; CHECK-NEXT: ret <2 x ptr> [[PGEP]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -225,8 +225,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) { define <2 x ptr> @ptrmask_simplify_known_unused_vec2(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec2 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64)) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) ; CHECK-NEXT: ret <2 x ptr> [[PGEP]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -239,7 +239,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec3(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec3 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { ; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) ; CHECK-NEXT: ret <2 x ptr> [[PGEP]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -266,7 +266,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail(<2 x ptr> %p) { ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { ; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) ; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> -; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> splat (i64 -32)) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -278,8 +278,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail(<2 x ptr> %p) { define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64)) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) ; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; @@ -293,8 +293,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { ; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> -; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> splat (i64 -32)) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) diff --git a/llvm/test/Transforms/InstSimplify/rem.ll b/llvm/test/Transforms/InstSimplify/rem.ll index 5ec803c6d0481e6..4fb1d9167ab7b34 100644 --- a/llvm/test/Transforms/InstSimplify/rem.ll +++ b/llvm/test/Transforms/InstSimplify/rem.ll @@ -505,7 +505,7 @@ define <2 x i8> @simplfy_srem_of_mul(<2 x i8> %x) { define <2 x i8> @simplfy_srem_of_mul_fail_bad_mod(<2 x i8> %x) { ; CHECK-LABEL: @simplfy_srem_of_mul_fail_bad_mod( ; CHECK-NEXT: [[MUL:%.*]] = mul nsw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = srem <2 x i8> [[MUL]], +; CHECK-NEXT: [[R:%.*]] = srem <2 x i8> [[MUL]], splat (i8 5) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %mul = mul nsw <2 x i8> %x, diff --git a/llvm/test/Transforms/InstSimplify/returned.ll b/llvm/test/Transforms/InstSimplify/returned.ll index 2da1052e7d4d898..cff12c5ea406c99 100644 --- a/llvm/test/Transforms/InstSimplify/returned.ll +++ b/llvm/test/Transforms/InstSimplify/returned.ll @@ -38,7 +38,7 @@ define <8 x i1> @returned_vec_arg_casted(<2 x i32> %a) { define <8 x i1> @returned_vec_arg_casted2(<2 x i32> %a) { ; CHECK-LABEL: @returned_vec_arg_casted2( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: [[X:%.*]] = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> [[OR]]) ; CHECK-NEXT: [[C:%.*]] = icmp ne <8 x i8> [[X]], zeroinitializer ; CHECK-NEXT: ret <8 x i1> [[C]] diff --git a/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll b/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll index ea139f2411cc3df..25a35062e60b8dc 100644 --- a/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll @@ -58,7 +58,7 @@ define i8 @uadd_scalar_maxval(i8 %a) { define <2 x i9> @uadd_vector_maxval(<2 x i9> %a) { ; CHECK-LABEL: @uadd_vector_maxval( -; CHECK-NEXT: ret <2 x i9> +; CHECK-NEXT: ret <2 x i9> splat (i9 -1) ; %x3v = call <2 x i9> @llvm.uadd.sat.v2i9(<2 x i9> %a, <2 x i9> ) ret <2 x i9> %x3v @@ -74,7 +74,7 @@ define i3 @uadd_scalar_maxval_commute(i3 %a) { define <2 x i8> @uadd_vector_maxval_commute(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_maxval_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x4v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> , <2 x i8> %a) ret <2 x i8> %x4v @@ -98,7 +98,7 @@ define i8 @uadd_scalar_poison(i8 %a) { define <2 x i8> @uadd_vector_undef(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_undef( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) ret <2 x i8> %x5v @@ -106,7 +106,7 @@ define <2 x i8> @uadd_vector_undef(<2 x i8> %a) { define <2 x i8> @uadd_vector_poison(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) ret <2 x i8> %x5v @@ -130,7 +130,7 @@ define i8 @uadd_scalar_poison_commute(i8 %a) { define <2 x i8> @uadd_vector_undef_commute(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_undef_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> %a) ret <2 x i8> %x5v @@ -138,7 +138,7 @@ define <2 x i8> @uadd_vector_undef_commute(<2 x i8> %a) { define <2 x i8> @uadd_vector_poison_commute(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_poison_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> %a) ret <2 x i8> %x5v @@ -187,7 +187,7 @@ define i8 @sadd_scalar_maxval(i8 %a) { define <2 x i8> @sadd_vector_maxval(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_maxval( -; CHECK-NEXT: [[Y3V:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[Y3V:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 127)) ; CHECK-NEXT: ret <2 x i8> [[Y3V]] ; %y3v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -230,7 +230,7 @@ define i8 @sadd_scalar_poison(i8 %a) { define <2 x i8> @sadd_vector_undef(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_undef( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y5v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> undef) ret <2 x i8> %y5v @@ -238,7 +238,7 @@ define <2 x i8> @sadd_vector_undef(<2 x i8> %a) { define <2 x i8> @sadd_vector_poison(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y5v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> poison) ret <2 x i8> %y5v @@ -262,7 +262,7 @@ define i8 @sadd_scalar_poison_commute(i8 %a) { define <2 x i8> @sadd_vector_undef_commute(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_undef_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y6v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> %a) ret <2 x i8> %y6v @@ -270,7 +270,7 @@ define <2 x i8> @sadd_vector_undef_commute(<2 x i8> %a) { define <2 x i8> @sadd_vector_poison_commute(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_poison_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y6v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> %a) ret <2 x i8> %y6v @@ -449,7 +449,7 @@ define i8 @ssub_scalar_maxval(i8 %a) { define <2 x i8> @ssub_vector_maxval(<2 x i8> %a) { ; CHECK-LABEL: @ssub_vector_maxval( -; CHECK-NEXT: [[Y3V:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[Y3V:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 127)) ; CHECK-NEXT: ret <2 x i8> [[Y3V]] ; %y3v = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -833,7 +833,7 @@ define i1 @uadd_ult(i8 %x, i8 %y) { define <2 x i1> @uadd_uge_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @uadd_uge_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sat = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y) %cmp = icmp uge <2 x i8> %sat, %x diff --git a/llvm/test/Transforms/InstSimplify/sdiv.ll b/llvm/test/Transforms/InstSimplify/sdiv.ll index 99092802cab0256..ec93e4d2fb6cbb5 100644 --- a/llvm/test/Transforms/InstSimplify/sdiv.ll +++ b/llvm/test/Transforms/InstSimplify/sdiv.ll @@ -12,7 +12,7 @@ define i32 @negated_operand(i32 %x) { define <2 x i32> @negated_operand_commute_vec(<2 x i32> %x) { ; CHECK-LABEL: @negated_operand_commute_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 -1) ; %negx = sub nsw <2 x i32> zeroinitializer, %x %div = sdiv <2 x i32> %negx, %x @@ -31,7 +31,7 @@ define i32 @knownnegation(i32 %x, i32 %y) { define <2 x i32> @knownnegation_commute_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @knownnegation_commute_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 -1) ; %xy = sub nsw <2 x i32> %x, %y %yx = sub nsw <2 x i32> %y, %x @@ -160,7 +160,7 @@ define <2 x i32> @knownnegation_commute_vec_bad3(<2 x i32> %x, <2 x i32> %y) { define <3 x i32> @negated_operand_vec_poison(<3 x i32> %x) { ; CHECK-LABEL: @negated_operand_vec_poison( -; CHECK-NEXT: ret <3 x i32> +; CHECK-NEXT: ret <3 x i32> splat (i32 -1) ; %negx = sub nsw <3 x i32> , %x %div = sdiv <3 x i32> %negx, %x diff --git a/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll index 33670128af439c9..6d48afc40135151 100644 --- a/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll @@ -59,7 +59,7 @@ define <2 x i32> @equal_arms_vec(<2 x i1> %cond, <2 x i32> %x) { define <2 x i32> @equal_arms_vec_poison(<2 x i1> %cond) { ; CHECK-LABEL: @equal_arms_vec_poison( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 42) ; %V = select <2 x i1> %cond, <2 x i32> , <2 x i32> ret <2 x i32> %V @@ -265,7 +265,7 @@ define i32 @test11(i32 %X) { define <2 x i8> @test11vec(<2 x i8> %X) { ; CHECK-LABEL: @test11vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i8> [[AND]] ; %cmp = icmp sgt <2 x i8> %X, @@ -658,8 +658,8 @@ define i1 @and_cmps(i32 %x) { define <2 x i1> @and_cmps_vector(<2 x i32> %x) { ; CHECK-LABEL: @and_cmps_vector( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 92) +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], splat (i32 11) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP1]], <2 x i1> [[CMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/select-logical.ll b/llvm/test/Transforms/InstSimplify/select-logical.ll index 1c11bb8982eb11f..6b5cef7b8e26ce6 100644 --- a/llvm/test/Transforms/InstSimplify/select-logical.ll +++ b/llvm/test/Transforms/InstSimplify/select-logical.ll @@ -238,7 +238,7 @@ define <3 x i1> @logical_or_not_and_vector1(<3 x i1> %x, <3 x i1> %y) { define <3 x i1> @logical_or_not_and_vector1_poison1(<3 x i1> %x, <3 x i1> %y) { ; CHECK-LABEL: @logical_or_not_and_vector1_poison1( ; CHECK-NEXT: [[L_AND:%.*]] = select <3 x i1> [[X:%.*]], <3 x i1> , <3 x i1> [[Y:%.*]] -; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i1> [[L_AND]], +; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i1> [[L_AND]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[NOT]], <3 x i1> [[X]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -299,7 +299,7 @@ define i1 @logical_nand_logical_or_common_op_commute1(i1 %x, i1 %y) { define <2 x i1> @logical_nand_logical_or_common_op_commute2(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @logical_nand_logical_or_common_op_commute2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %and = select <2 x i1> %y, <2 x i1> %x, <2 x i1> zeroinitializer %nand = xor <2 x i1> %and, @@ -309,7 +309,7 @@ define <2 x i1> @logical_nand_logical_or_common_op_commute2(<2 x i1> %x, <2 x i1 define <2 x i1> @logical_nand_logical_or_common_op_commute3(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @logical_nand_logical_or_common_op_commute3( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %and = select <2 x i1> %x, <2 x i1> %y, <2 x i1> zeroinitializer %nand = xor <2 x i1> %and, @@ -332,8 +332,8 @@ define i1 @logical_nand_logical_or_common_op_commute4(i1 %x, i1 %y) { define <2 x i1> @logical_nand_logical_or_common_op_commute4_poison_vec(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @logical_nand_logical_or_common_op_commute4_poison_vec( ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i1> [[X:%.*]], <2 x i1> -; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i1> [[AND]], -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[X]], <2 x i1> , <2 x i1> [[NAND]] +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i1> [[AND]], splat (i1 true) +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[X]], <2 x i1> splat (i1 true), <2 x i1> [[NAND]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %and = select <2 x i1> %y, <2 x i1> %x, <2 x i1> @@ -666,7 +666,7 @@ define <2 x i1> @or_same_op(<2 x i1> %x) { define <2 x i1> @always_true_same_op(<2 x i1> %x) { ; CHECK-LABEL: @always_true_same_op( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = select <2 x i1> %x, <2 x i1> %x, <2 x i1> ret <2 x i1> %r @@ -714,7 +714,7 @@ define <2 x i1> @or_and_common_op_commute2(<2 x i1> %x, <2 x i1> %y) { define <2 x i1> @or_and_common_op_commute2_poison(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @or_and_common_op_commute2_poison( ; CHECK-NEXT: [[A:%.*]] = select <2 x i1> [[X:%.*]], <2 x i1> [[Y:%.*]], <2 x i1> -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y]], <2 x i1> , <2 x i1> [[A]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y]], <2 x i1> splat (i1 true), <2 x i1> [[A]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %a = select <2 x i1> %x, <2 x i1> %y, <2 x i1> diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index 4618e1143dc8f97..5f160aa1658616b 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -67,7 +67,7 @@ define <2 x i32> @equal_arms_vec(<2 x i1> %cond, <2 x i32> %x) { define <2 x i32> @equal_arms_vec_poison(<2 x i1> %cond) { ; CHECK-LABEL: @equal_arms_vec_poison( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 42) ; %V = select <2 x i1> %cond, <2 x i32> , <2 x i32> ret <2 x i32> %V @@ -319,7 +319,7 @@ define i32 @test11(i32 %X) { define <2 x i8> @test11vec(<2 x i8> %X) { ; CHECK-LABEL: @test11vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i8> [[AND]] ; %cmp = icmp sgt <2 x i8> %X, @@ -712,8 +712,8 @@ define i1 @and_cmps(i32 %x) { define <2 x i1> @and_cmps_vector(<2 x i32> %x) { ; CHECK-LABEL: @and_cmps_vector( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 92) +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], splat (i32 11) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP1]], <2 x i1> [[CMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/select_or_and.ll b/llvm/test/Transforms/InstSimplify/select_or_and.ll index 6bb7fbf5dc41d4f..41b3353df57268a 100644 --- a/llvm/test/Transforms/InstSimplify/select_or_and.ll +++ b/llvm/test/Transforms/InstSimplify/select_or_and.ll @@ -204,7 +204,7 @@ define i32 @select_icmp_and_eq_commuted(i32 %a, i32 %b) { ; https://alive2.llvm.org/ce/z/HfYXvx define <2 x i16> @select_icmp_and_eq_vec(<2 x i16> %a, <2 x i16> %b) { ; CHECK-LABEL: @select_icmp_and_eq_vec( -; CHECK-NEXT: ret <2 x i16> +; CHECK-NEXT: ret <2 x i16> splat (i16 -1) ; %and = and <2 x i16> %a, %b %tobool = icmp eq <2 x i16> %and, diff --git a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll index 6bf03779379ec73..3917172e3b752bf 100644 --- a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll +++ b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll @@ -264,7 +264,7 @@ define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) { define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @shl_vector_for_real( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 3) ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[AND]] ; CHECK-NEXT: ret <2 x i32> [[SHL]] ; @@ -318,7 +318,7 @@ define i32 @lshr_cttz_zero_is_undef(i32 %x) { define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec( ; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) @@ -342,7 +342,7 @@ define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) { define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec( ; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true) -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) diff --git a/llvm/test/Transforms/InstSimplify/shift.ll b/llvm/test/Transforms/InstSimplify/shift.ll index a816fcbdeeee001..a6de24e24f63c43 100644 --- a/llvm/test/Transforms/InstSimplify/shift.ll +++ b/llvm/test/Transforms/InstSimplify/shift.ll @@ -115,7 +115,7 @@ define i32 @ashr_all_ones(i32 %A) { define <3 x i8> @ashr_all_ones_vec_with_poison_elts(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @ashr_all_ones_vec_with_poison_elts( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> splat (i8 -1) ; %sh = ashr <3 x i8> , %y ret <3 x i8> %sh @@ -225,9 +225,9 @@ define <2 x i64> @shl_or_shr2v(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @shl_or_shr2v( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], splat (i64 31) ; CHECK-NEXT: [[T4:%.*]] = or <2 x i64> [[T2]], [[T3]] -; CHECK-NEXT: [[T5:%.*]] = lshr <2 x i64> [[T4]], +; CHECK-NEXT: [[T5:%.*]] = lshr <2 x i64> [[T4]], splat (i64 31) ; CHECK-NEXT: ret <2 x i64> [[T5]] ; %t1 = zext <2 x i32> %a to <2 x i64> @@ -297,7 +297,7 @@ define i32 @all_ones_left_right(i32 %x) { define <2 x i7> @all_ones_left_right_splat(<2 x i7> %x) { ; CHECK-LABEL: @all_ones_left_right_splat( -; CHECK-NEXT: ret <2 x i7> +; CHECK-NEXT: ret <2 x i7> splat (i7 -1) ; %left = shl <2 x i7> , %x %right = ashr <2 x i7> %left, %x @@ -319,7 +319,7 @@ define <3 x i7> @all_ones_left_right_splat_undef_elt(<3 x i7> %x) { define <3 x i7> @all_ones_left_right_splat_poison__elt(<3 x i7> %x) { ; CHECK-LABEL: @all_ones_left_right_splat_poison__elt( -; CHECK-NEXT: ret <3 x i7> +; CHECK-NEXT: ret <3 x i7> splat (i7 -1) ; %left = shl <3 x i7> , %x %right = ashr <3 x i7> %left, %x diff --git a/llvm/test/Transforms/InstSimplify/shr-nop.ll b/llvm/test/Transforms/InstSimplify/shr-nop.ll index 90d49ccb312bcbd..29fe222faaae3e9 100644 --- a/llvm/test/Transforms/InstSimplify/shr-nop.ll +++ b/llvm/test/Transforms/InstSimplify/shr-nop.ll @@ -415,7 +415,7 @@ define <2 x i4097> @ashr_zero_vec(<2 x i4097> %shiftval) { define <2 x i64> @ashr_minus1_vec(<2 x i64> %shiftval) { ; CHECK-LABEL: @ashr_minus1_vec( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %shr = ashr <2 x i64> , %shiftval ret <2 x i64> %shr diff --git a/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll index 49cacdc1e8d0f65..e7b54947587cd02 100644 --- a/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll @@ -11,7 +11,7 @@ define <4 x i32> @const_folding(<4 x i32> %x) { define <4 x i32> @const_folding1(<4 x i32> %x) { ; CHECK-LABEL: @const_folding1( -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 5) ; %shuf = shufflevector <4 x i32> , <4 x i32> %x, <4 x i32> zeroinitializer ret <4 x i32> %shuf @@ -260,7 +260,7 @@ define <2 x float> @PR32872(<2 x float> %x) { define <5 x i8> @splat_inserted_constant(<4 x i8> %x) { ; CHECK-LABEL: @splat_inserted_constant( -; CHECK-NEXT: ret <5 x i8> +; CHECK-NEXT: ret <5 x i8> splat (i8 42) ; %ins3 = insertelement <4 x i8> %x, i8 42, i64 3 %splat5 = shufflevector <4 x i8> %ins3, <4 x i8> poison, <5 x i32> diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll index 201950516160504..0442bd721974b5a 100644 --- a/llvm/test/Transforms/InstSimplify/shufflevector.ll +++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll @@ -11,7 +11,7 @@ define <4 x i32> @const_folding(<4 x i32> %x) { define <4 x i32> @const_folding1(<4 x i32> %x) { ; CHECK-LABEL: @const_folding1( -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 5) ; %shuf = shufflevector <4 x i32> , <4 x i32> %x, <4 x i32> zeroinitializer ret <4 x i32> %shuf @@ -260,7 +260,7 @@ define <2 x float> @PR32872(<2 x float> %x) { define <5 x i8> @splat_inserted_constant(<4 x i8> %x) { ; CHECK-LABEL: @splat_inserted_constant( -; CHECK-NEXT: ret <5 x i8> +; CHECK-NEXT: ret <5 x i8> splat (i8 42) ; %ins3 = insertelement <4 x i8> %x, i8 42, i64 3 %splat5 = shufflevector <4 x i8> %ins3, <4 x i8> undef, <5 x i32> @@ -296,7 +296,7 @@ define <4 x i32> @fold_identity(<4 x i32> %x) { define <4 x i32> @fold_identity2(<4 x i32> %x) { ; CHECK-LABEL: @fold_identity2( -; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[SHL]] ; %shl = shl <4 x i32> %x, diff --git a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll index 98422706f653bfa..d75c00e04c4ebf1 100644 --- a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll +++ b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll @@ -36,7 +36,7 @@ define float @fadd_x_n0_ebmaytrap(float %a) #0 { define <2 x float> @fadd_vec_x_n0_ebmaytrap(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_x_n0_ebmaytrap( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -54,7 +54,7 @@ define float @fadd_x_n0_ebstrict(float %a) #0 { define <2 x float> @fadd_vec_x_n0_ebstrict(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_x_n0_ebstrict( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -72,7 +72,7 @@ define float @fadd_x_n0_neginf(float %a) #0 { define <2 x float> @fadd_vec_x_n0_neginf(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_x_n0_neginf( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -90,7 +90,7 @@ define float @fadd_x_n0_dynamic(float %a) #0 { define <2 x float> @fadd_vec_x_n0_dynamic(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_x_n0_dynamic( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 @@ -144,7 +144,7 @@ define float @fadd_nnan_x_n0_ebstrict(float %a) #0 { define <2 x float> @fadd_vec_nnan_x_n0_ebstrict(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_nnan_x_n0_ebstrict( -; CHECK-NEXT: [[RET:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[A]] ; %ret = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -164,7 +164,7 @@ define float @fadd_ninf_x_n0_ebstrict(float %a) #0 { ; Test with a fast math flag set but that flag is not "nnan". define <2 x float> @fadd_vec_ninf_x_n0_ebstrict(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_ninf_x_n0_ebstrict( -; CHECK-NEXT: [[RET:%.*]] = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -200,7 +200,7 @@ define float @fadd_n0_x_ebmaytrap(float %a) #0 { ; TODO: Canonicalize the order of the arguments. Then this will fire. define <2 x float> @fadd_vec_n0_x_ebmaytrap(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_n0_x_ebmaytrap( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> , <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll index 47720060acb5288..9b4f8dd7a5a4bef 100644 --- a/llvm/test/Transforms/InstSimplify/uscmp.ll +++ b/llvm/test/Transforms/InstSimplify/uscmp.ll @@ -83,7 +83,7 @@ define i8 @ucmp_undef() { define <4 x i8> @ucmp_lt_splat() { ; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() { -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 -1) ; %1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3)) ret <4 x i8> %1 @@ -222,7 +222,7 @@ define i8 @ucmp_with_addition2(i32 %x) { define <4 x i8> @ucmp_with_addition_vec(<4 x i32> %x) { ; CHECK-LABEL: define <4 x i8> @ucmp_with_addition_vec( ; CHECK-SAME: <4 x i32> [[X:%.*]]) { -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 -1) ; %1 = add nuw <4 x i32> %x, splat(i32 1) %2 = call <4 x i8> @llvm.ucmp(<4 x i32> %x, <4 x i32> %1) diff --git a/llvm/test/Transforms/InstSimplify/vec-cmp.ll b/llvm/test/Transforms/InstSimplify/vec-cmp.ll index dce1261d7031c9f..b2ae4b82287d5a7 100644 --- a/llvm/test/Transforms/InstSimplify/vec-cmp.ll +++ b/llvm/test/Transforms/InstSimplify/vec-cmp.ll @@ -12,7 +12,7 @@ define <2 x i1> @nonzero_vec_splat(<2 x i32> %x) { define <2 x i1> @nonzero_vec_nonsplat(<2 x i32> %x) { ; CHECK-LABEL: @nonzero_vec_nonsplat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %y = or <2 x i32> %x, %c = icmp ne <2 x i32> %y, zeroinitializer @@ -65,7 +65,7 @@ define <2 x i1> @nonzero_vec_mul_nuw(<2 x i32> %x, <2 x i32> %y) { ; Multiplies of non-zero numbers are non-zero if there is no signed overflow. define <2 x i1> @nonzero_vec_mul_nsw(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @nonzero_vec_mul_nsw( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xnz = or <2 x i32> %x, %ynz = or <2 x i32> %y, diff --git a/llvm/test/Transforms/InstSimplify/vec-icmp-of-cast.ll b/llvm/test/Transforms/InstSimplify/vec-icmp-of-cast.ll index 4acf2fba1934f40..8b27ab1f0ef2674 100644 --- a/llvm/test/Transforms/InstSimplify/vec-icmp-of-cast.ll +++ b/llvm/test/Transforms/InstSimplify/vec-icmp-of-cast.ll @@ -63,7 +63,7 @@ define <2 x i1> @icmp_eq_zext_unused(<2 x i8> %x) { define <2 x i1> @icmp_ne_zext_is_true(<2 x i8> %x) { ; CHECK-LABEL: @icmp_ne_zext_is_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xext = zext <2 x i8> %x to <2 x i32> %cmp = icmp ne <2 x i32> %xext, @@ -72,7 +72,7 @@ define <2 x i1> @icmp_ne_zext_is_true(<2 x i8> %x) { define <2 x i1> @icmp_ult_zext_is_true(<2 x i8> %x) { ; CHECK-LABEL: @icmp_ult_zext_is_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xext = zext <2 x i8> %x to <2 x i32> %cmp = icmp ult <2 x i32> %xext, @@ -81,7 +81,7 @@ define <2 x i1> @icmp_ult_zext_is_true(<2 x i8> %x) { define <2 x i1> @icmp_ule_zext_is_true(<2 x i8> %x) { ; CHECK-LABEL: @icmp_ule_zext_is_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xext = zext <2 x i8> %x to <2 x i32> %cmp = icmp ule <2 x i32> %xext, @@ -150,7 +150,7 @@ define <2 x i1> @icmp_eq_sext_fail(<2 x i8> %x) { define <2 x i1> @icmp_ne_sext_is_true(<2 x i8> %x) { ; CHECK-LABEL: @icmp_ne_sext_is_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xext = sext <2 x i8> %x to <2 x i32> %cmp = icmp ne <2 x i32> %xext, diff --git a/llvm/test/Transforms/InstSimplify/vector_gep.ll b/llvm/test/Transforms/InstSimplify/vector_gep.ll index a1d0bd379aa7402..c373e15f19e8221 100644 --- a/llvm/test/Transforms/InstSimplify/vector_gep.ll +++ b/llvm/test/Transforms/InstSimplify/vector_gep.ll @@ -56,7 +56,7 @@ define <4 x ptr> @test4(<4 x ptr> %a) { define <4 x ptr> @test5() { ; CHECK-LABEL: define <4 x ptr> @test5() { -; CHECK-NEXT: ret <4 x ptr> getelementptr (i8, <4 x ptr> , <4 x i64> ) +; CHECK-NEXT: ret <4 x ptr> getelementptr (i8, <4 x ptr> , <4 x i64> splat (i64 1)) ; %c = inttoptr <4 x i64> to <4 x ptr> %gep = getelementptr i8, <4 x ptr> %c, <4 x i32> diff --git a/llvm/test/Transforms/InstSimplify/xor.ll b/llvm/test/Transforms/InstSimplify/xor.ll index 229e943a3836f21..eaa016b6614e131 100644 --- a/llvm/test/Transforms/InstSimplify/xor.ll +++ b/llvm/test/Transforms/InstSimplify/xor.ll @@ -23,7 +23,7 @@ define i4 @xor_and_or_not_commute0(i4 %a, i4 %b) { define <2 x i4> @xor_and_or_not_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @xor_and_or_not_commute1( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %and = and <2 x i4> %a, %b @@ -47,7 +47,7 @@ define i74 @xor_and_or_not_commute2(i74 %a, i74 %b) { define <2 x i4> @xor_and_or_not_commute3(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @xor_and_or_not_commute3( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %and = and <2 x i4> %b, %a diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll index 167d282edb3e72f..41dcf2a06372300 100644 --- a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll @@ -12,7 +12,7 @@ define <2 x double> @shuffle_binop_fol(ptr %ptr) { ; CHECK-NEXT: vector.body.preheader: ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[PTR:%.*]], align 8 ; CHECK-NEXT: [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[EXTRACTED2:%.*]] = shufflevector <4 x double> , <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[EXTRACTED2:%.*]] = shufflevector <4 x double> splat (double 1.000000e+00), <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]] ; CHECK-NEXT: ret <2 x double> [[FADD3]] ; @@ -27,7 +27,7 @@ define <2 x double> @shuffle_binop_fol_oob(ptr %ptr) { ; CHECK-LABEL: @shuffle_binop_fol_oob( ; CHECK-NEXT: vector.body.preheader: ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[FADD:%.*]] = fadd <4 x double> [[WIDE_LOAD]], +; CHECK-NEXT: [[FADD:%.*]] = fadd <4 x double> [[WIDE_LOAD]], splat (double 1.000000e+00) ; CHECK-NEXT: [[EXTRACTED:%.*]] = shufflevector <4 x double> [[FADD]], <4 x double> undef, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[EXTRACTED]] ; diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll index f38f948c2a2e505..e6a8af60f12872d 100644 --- a/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll +++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll @@ -217,7 +217,7 @@ define void @f4(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { ; CHECK-NEXT: [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P1]] to <2 x half> ; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4 ; CHECK-NEXT: [[A:%.*]] = load <2 x half>, ptr [[AIDX]], align 4 -; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], +; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], splat (half 0xH4000) ; CHECK-NEXT: [[C_INT:%.*]] = bitcast <2 x half> [[C]] to i32 ; CHECK-NEXT: store i32 [[C_INT]], ptr [[CIDX]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll index b5316f553c21b74..56b910eebea92f1 100644 --- a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll +++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll @@ -217,7 +217,7 @@ define void @f4(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { ; CHECK-NEXT: [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P1]] to <2 x half> ; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4 ; CHECK-NEXT: [[A:%.*]] = load <2 x half>, ptr [[AIDX]], align 4 -; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], +; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], splat (half 0xH4000) ; CHECK-NEXT: [[C_INT:%.*]] = bitcast <2 x half> [[C]] to i32 ; CHECK-NEXT: store i32 [[C_INT]], ptr [[CIDX]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] diff --git a/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll b/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll index 2bb6f05b91b1ab2..3673b1b4d2241a5 100644 --- a/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll +++ b/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll @@ -28,7 +28,7 @@ define void @unroll_upper(ptr noundef %pSrc, ptr nocapture noundef writeonly %pD ; CHECK-NEXT: [[NEXT_GEP37:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[AND]]) ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[NEXT_GEP37]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison) -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[WIDE_MASKED_LOAD]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[WIDE_MASKED_LOAD]], splat (i16 8) ; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> ; CHECK-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP3]], ptr [[NEXT_GEP]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll index d0041668e77bdeb..456875ecb7d1fcc 100644 --- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll @@ -32,22 +32,22 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND12:%.*]] = phi <16 x i32> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> , [[VEC_IND12]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12]] ; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], -; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i32> , [[VEC_IND_NEXT13]] +; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], splat (i32 16) +; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND_NEXT13]] ; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i32> [[TMP9]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <16 x i32> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]] ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT_1]] = add i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], +; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], splat (i32 32) ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]] @@ -63,10 +63,10 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add ; CHECK: vector.body.epil.preheader: ; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] ; CHECK: vector.body.epil: -; CHECK-NEXT: [[TMP14:%.*]] = shl <16 x i32> , [[VEC_IND12_UNR]] +; CHECK-NEXT: [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]] ; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i32> [[TMP14]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]] ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP18]], align 1 ; CHECK-NEXT: br label [[MIDDLE_BLOCK]] diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll index c2a8675f7ebbab1..cd4198f8160f737 100644 --- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll @@ -32,22 +32,22 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND12:%.*]] = phi <16 x i32> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> , [[VEC_IND12]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12]] ; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], -; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i32> , [[VEC_IND_NEXT13]] +; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], splat (i32 16) +; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND_NEXT13]] ; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i32> [[TMP9]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <16 x i32> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]] ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT_1]] = add i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], +; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], splat (i32 32) ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]] @@ -63,10 +63,10 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add ; CHECK: vector.body.epil.preheader: ; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] ; CHECK: vector.body.epil: -; CHECK-NEXT: [[TMP14:%.*]] = shl <16 x i32> , [[VEC_IND12_UNR]] +; CHECK-NEXT: [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]] ; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i32> [[TMP14]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]] ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP18]], align 1 ; CHECK-NEXT: br label [[MIDDLE_BLOCK]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll index cec9aa42d469350..ddf6c1005e05867 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll @@ -27,7 +27,7 @@ define void @test_blend_feeding_replicated_store_1(i64 %N, ptr noalias %src, ptr ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i1> zeroinitializer, <16 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP7]], [[TMP8]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP7]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP9]], i32 0 @@ -217,11 +217,11 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> splat (i8 1) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll index dcb8ba736616679..4f050877bd1316f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll @@ -21,8 +21,8 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) { ; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[WIDE_LOAD]], <2 x i64> [[WIDE_LOAD1]], <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> , <2 x i64> [[TMP6]], <2 x i64> ) -; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> , <2 x i64> [[TMP7]], <2 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1)) +; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 7f325ce1a1f04b3..c6e58326158a372 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -89,8 +89,8 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) { ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 ; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]]) ; DEFAULT-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD1]]) -; DEFAULT-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP7]], -; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp ogt <2 x double> [[TMP8]], +; DEFAULT-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP7]], splat (double 1.000000e+00) +; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp ogt <2 x double> [[TMP8]], splat (double 1.000000e+00) ; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DEFAULT: pred.store.if: @@ -390,7 +390,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; PRED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 99) ; PRED-NEXT: [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 ; PRED-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; PRED: pred.store.if: @@ -456,7 +456,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; PRED: pred.store.continue14: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104 ; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: @@ -744,7 +744,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], +; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1) ; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double> ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0 ; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[TMP4]], align 8 @@ -869,7 +869,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] ; DEFAULT-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8 -; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6) ; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DEFAULT: pred.store.if: @@ -942,7 +942,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE14]] ; DEFAULT: pred.store.continue14: -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: middle.block: @@ -971,7 +971,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] ; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] ; PRED-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8 -; PRED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6) ; PRED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 ; PRED-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; PRED: pred.store.if: @@ -1044,7 +1044,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; PRED-NEXT: br label [[PRED_STORE_CONTINUE14]] ; PRED: pred.store.continue14: -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; PRED: middle.block: @@ -1258,7 +1258,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE27]] ; DEFAULT: pred.store.continue27: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; DEFAULT: middle.block: @@ -1480,8 +1480,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED: pred.store.continue27: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) -; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0 ; PRED-NEXT: br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; PRED: middle.block: @@ -1562,8 +1562,8 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], -; DEFAULT-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20) +; DEFAULT-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -1594,7 +1594,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE6]] ; DEFAULT: pred.store.continue6: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; DEFAULT: middle.block: @@ -1625,8 +1625,8 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], -; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], +; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20) +; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; PRED-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; PRED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; PRED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -1657,7 +1657,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; PRED: pred.store.continue6: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; PRED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 0f33e8fa79ce7b7..2ba460255c607dd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -33,14 +33,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw <16 x i16> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP5]], splat (i16 8) ; CHECK-NEXT: [[TMP7:%.*]] = trunc nuw <16 x i16> [[TMP6]] to <16 x i8> ; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i16> ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw <16 x i16> [[TMP9]], [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], splat (i16 8) ; CHECK-NEXT: [[TMP12:%.*]] = trunc nuw <16 x i16> [[TMP11]] to <16 x i8> ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -66,14 +66,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD8]] to <8 x i16> ; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD7]] to <8 x i16> ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw <8 x i16> [[TMP16]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], +; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], splat (i16 8) ; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <8 x i16> [[TMP19]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[TMP20]], ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX6]] ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1 ; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD9]] to <8 x i16> ; CHECK-NEXT: [[TMP23:%.*]] = mul nuw <8 x i16> [[TMP22]], [[TMP17]] -; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], +; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], splat (i16 8) ; CHECK-NEXT: [[TMP25:%.*]] = trunc nuw <8 x i16> [[TMP24]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[TMP25]], ptr [[TMP21]], align 1 ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX6]], 8 @@ -162,43 +162,43 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP5]], -; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i16> [[TMP4]] to <16 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16 -; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP12]], align 1 -; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0 -; CHECK-NEXT: [[TMP18:%.*]] = mul <8 x i16> [[TMP16]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], -; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i16> [[TMP19]] to <8 x i8> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[A]] to i16 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i16> poison, i16 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i16> [[TMP14]], +; CHECK-NEXT: [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX7]] to i64 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]] -; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[INDEX7]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP18]] +; CHECK-NEXT: store <8 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i32 [[INDEX7]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000 -; CHECK-NEXT: br i1 [[TMP24]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000 +; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -240,43 +240,43 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 % ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[C]], <16 x i16> [[TMP5]], <16 x i16> [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = mul <16 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], splat (i16 8) +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <16 x i16> [[TMP4]], <16 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[TMP5]] to <16 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16 -; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP12]], align 1 -; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 -; CHECK-NEXT: [[TMP17:%.*]] = mul <8 x i16> [[TMP16]], -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i16> [[TMP17]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[C]], <8 x i16> [[TMP19]], <8 x i16> [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i16> [[TMP20]] to <8 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[A]] to i16 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i16> poison, i16 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = mul <8 x i16> [[TMP12]], +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[TMP13]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i16> [[TMP14]], splat (i16 8) +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[C]], <8 x i16> [[TMP15]], <8 x i16> [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i16> [[TMP16]] to <8 x i8> ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX3]] to i64 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]] -; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[INDEX3]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP18]] +; CHECK-NEXT: store <8 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX3]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000 -; CHECK-NEXT: br i1 [[TMP24]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000 +; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -464,8 +464,8 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP1]], align 4 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP2]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP1]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 7749bb9edb124ea..7c6a881ead10877 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -124,7 +124,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 @@ -132,7 +132,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -159,7 +159,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <2 x i64> [[VEC_IND6]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i64> [[VEC_IND6]], +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i64> [[VEC_IND6]], splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -213,7 +213,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP1]] @@ -222,7 +222,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -252,7 +252,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <2 x i64> [[VEC_IND11]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND11]], +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND11]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -301,17 +301,17 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10) +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP4]], align 4 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IND_END4]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -335,11 +335,11 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX7]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[VEC_IND8]], +; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[VEC_IND8]], splat (i64 10) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <2 x i64> [[VEC_IND8]], +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <2 x i64> [[VEC_IND8]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[IND_END]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -393,7 +393,7 @@ define void @test_widen_extended_induction(ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i64 @@ -403,7 +403,7 @@ define void @test_widen_extended_induction(ptr %dst) { ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP3]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP4]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -427,7 +427,7 @@ define void @test_widen_extended_induction(ptr %dst) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <2 x i8> [[VEC_IND3]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX2]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT6]], 10000 ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -475,7 +475,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 @@ -483,7 +483,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP2]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -506,7 +506,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <2 x i8> [[VEC_IND3]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX2]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 10000 ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll index e9c9288e7343946..86a9af6fd5a3cdb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll @@ -16,7 +16,7 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -65,8 +65,8 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP26]], ptr [[TMP29]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX_NEXT]], i64 1002) -; CHECK-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP30]], i32 0 ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index eafe6921b37ca71..547e7afbefcafed 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -14,8 +14,8 @@ define double @test_reduction_costs() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> ) -; CHECK-NEXT: [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> ) +; CHECK-NEXT: [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> splat (double 3.000000e+00)) +; CHECK-NEXT: [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> splat (double 9.000000e+00)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -214,8 +214,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE74:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP47]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -240,8 +240,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] ; CHECK: [[PRED_STORE_CONTINUE47]]: -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP2]], -; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 @@ -274,8 +274,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = or <2 x i1> [[TMP47]], [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> -; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) +; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP22]], i32 0 ; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]] ; CHECK: [[PRED_STORE_IF59]]: @@ -304,8 +304,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE66]] ; CHECK: [[PRED_STORE_CONTINUE66]]: -; CHECK-NEXT: [[TMP32:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], -; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], +; CHECK-NEXT: [[TMP32:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) +; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) ; CHECK-NEXT: [[TMP34:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[TMP32]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP36:%.*]] = or <2 x i1> [[TMP22]], [[TMP34]] @@ -313,29 +313,29 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0 ; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF67:.*]], label %[[PRED_STORE_CONTINUE68:.*]] ; CHECK: [[PRED_STORE_IF67]]: -; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] -; CHECK-NEXT: store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE68]] ; CHECK: [[PRED_STORE_CONTINUE68]]: ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1 ; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF69:.*]], label %[[PRED_STORE_CONTINUE70:.*]] ; CHECK: [[PRED_STORE_IF69]]: -; CHECK-NEXT: [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE70]] ; CHECK: [[PRED_STORE_CONTINUE70]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 ; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF71:.*]], label %[[PRED_STORE_CONTINUE72:.*]] ; CHECK: [[PRED_STORE_IF71]]: -; CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE72]] ; CHECK: [[PRED_STORE_CONTINUE72]]: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1 ; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF73:.*]], label %[[PRED_STORE_CONTINUE74]] ; CHECK: [[PRED_STORE_IF73]]: -; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE74]] ; CHECK: [[PRED_STORE_CONTINUE74]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 553989544c7787d..2f756ab4b0e1abf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -355,8 +355,8 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: pred.store.continue2: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; PRED-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; PRED-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0 ; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: middle.block: @@ -549,8 +549,8 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: pred.store.continue8: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]]) -; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; PRED-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP35]], i32 0 ; PRED-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: middle.block: @@ -746,8 +746,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: pred.store.continue7: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP34]], i32 0 ; PRED-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index cd7662a657dfe61..8f40c31382a8602 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -30,8 +30,8 @@ define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 4 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP8]], align 8 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[VEC_PHI1]], +; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI]], splat (i32 -1) +; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[VEC_PHI1]], splat (i32 -1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -225,7 +225,7 @@ define void @wide_truncated_iv(ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0 @@ -233,7 +233,7 @@ define void @wide_truncated_iv(ptr %dst) { ; CHECK-NEXT: store <8 x i8> [[VEC_IND]], ptr [[TMP4]], align 1 ; CHECK-NEXT: store <8 x i8> [[STEP_ADD]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[STEP_ADD]], splat (i8 8) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 192 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -269,153 +269,69 @@ exit: } define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) { -; DEFAULT-LABEL: define i64 @test_ptr_ivs_and_widened_ivs( -; DEFAULT-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { -; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; DEFAULT-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; DEFAULT-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 -; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] -; DEFAULT-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 -; DEFAULT-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32 -; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] -; DEFAULT: vector.body: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], -; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; DEFAULT-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 -; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]] -; DEFAULT-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 -; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 -; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; DEFAULT-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; DEFAULT-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[WIDE_LOAD]], -; DEFAULT-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[WIDE_LOAD7]], -; DEFAULT-NEXT: [[TMP10:%.*]] = zext <4 x i32> [[TMP8]] to <4 x i64> -; DEFAULT-NEXT: [[TMP11:%.*]] = zext <4 x i32> [[TMP9]] to <4 x i64> -; DEFAULT-NEXT: [[TMP12:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> -; DEFAULT-NEXT: [[TMP13:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> -; DEFAULT-NEXT: [[TMP14:%.*]] = shl <4 x i64> [[TMP10]], [[TMP12]] -; DEFAULT-NEXT: [[TMP15]] = shl <4 x i64> [[TMP11]], [[TMP13]] -; DEFAULT-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP14]], <4 x i32> -; DEFAULT-NEXT: [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP15]], <4 x i32> -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], -; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; DEFAULT: middle.block: -; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP15]], i32 2 -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP15]], i32 3 -; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; DEFAULT-NEXT: br label [[LOOP:%.*]] -; DEFAULT: loop: -; DEFAULT-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ] -; DEFAULT-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] -; DEFAULT-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] -; DEFAULT-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] -; DEFAULT-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 -; DEFAULT-NEXT: [[NOT:%.*]] = xor i32 [[L]], 1 -; DEFAULT-NEXT: [[NOT_EXT:%.*]] = zext i32 [[NOT]] to i64 -; DEFAULT-NEXT: [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64 -; DEFAULT-NEXT: [[SHL]] = shl i64 [[NOT_EXT]], [[IV_EXT]] -; DEFAULT-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 -; DEFAULT-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 -; DEFAULT-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 -; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_2_NEXT]], [[N]] -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] -; DEFAULT: exit: -; DEFAULT-NEXT: [[P_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] -; DEFAULT-NEXT: ret i64 [[P_LCSSA]] -; -; PRED-LABEL: define i64 @test_ptr_ivs_and_widened_ivs( -; PRED-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { -; PRED-NEXT: entry: -; PRED-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; PRED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; PRED-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; PRED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 -; PRED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; PRED: vector.ph: -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 -; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] -; PRED-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 -; PRED-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32 -; PRED-NEXT: br label [[VECTOR_BODY:%.*]] -; PRED: vector.body: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], -; PRED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; PRED-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; PRED-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 -; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]] -; PRED-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] -; PRED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 -; PRED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 -; PRED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; PRED-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; PRED-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[WIDE_LOAD]], -; PRED-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[WIDE_LOAD7]], -; PRED-NEXT: [[TMP10:%.*]] = zext <4 x i32> [[TMP8]] to <4 x i64> -; PRED-NEXT: [[TMP11:%.*]] = zext <4 x i32> [[TMP9]] to <4 x i64> -; PRED-NEXT: [[TMP12:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> -; PRED-NEXT: [[TMP13:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> -; PRED-NEXT: [[TMP14:%.*]] = shl <4 x i64> [[TMP10]], [[TMP12]] -; PRED-NEXT: [[TMP15]] = shl <4 x i64> [[TMP11]], [[TMP13]] -; PRED-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP14]], <4 x i32> -; PRED-NEXT: [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP15]], <4 x i32> -; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], -; PRED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; PRED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; PRED: middle.block: -; PRED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; PRED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP15]], i32 3 -; PRED-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP15]], i32 2 -; PRED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; PRED: scalar.ph: -; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] -; PRED-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; PRED-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; PRED-NEXT: br label [[LOOP:%.*]] -; PRED: loop: -; PRED-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ] -; PRED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] -; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] -; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] -; PRED-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 -; PRED-NEXT: [[NOT:%.*]] = xor i32 [[L]], 1 -; PRED-NEXT: [[NOT_EXT:%.*]] = zext i32 [[NOT]] to i64 -; PRED-NEXT: [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64 -; PRED-NEXT: [[SHL]] = shl i64 [[NOT_EXT]], [[IV_EXT]] -; PRED-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 -; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 -; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 -; PRED-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_2_NEXT]], [[N]] -; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] -; PRED: exit: -; PRED-NEXT: [[P_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] -; PRED-NEXT: ret i64 [[P_LCSSA]] +; CHECK-LABEL: define i64 @test_ptr_ivs_and_widened_ivs( +; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> +; CHECK-NEXT: [[TMP10]] = shl <4 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[P:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[L]], 1 +; CHECK-NEXT: [[NOT_EXT:%.*]] = zext i32 [[NOT]] to i64 +; CHECK-NEXT: [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64 +; CHECK-NEXT: [[SHL]] = shl i64 [[NOT_EXT]], [[IV_EXT]] +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 +; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 +; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_2_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[P_LCSSA:%.*]] = phi i64 [ [[P]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[P_LCSSA]] ; entry: br label %loop @@ -441,72 +357,56 @@ exit: } define void @zext_iv_increment(ptr %dst, i64 %N) { -; DEFAULT-LABEL: define void @zext_iv_increment( -; DEFAULT-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { -; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; DEFAULT: vector.scevcheck: -; DEFAULT-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) -; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 -; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; DEFAULT-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 -; DEFAULT-NEXT: [[TMP3:%.*]] = add i32 1, [[TMP2]] -; DEFAULT-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1 -; DEFAULT-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; DEFAULT-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] -; DEFAULT-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] -; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2 -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] -; DEFAULT-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 -; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] -; DEFAULT: vector.body: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 -; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP7]], i32 2 -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2 -; DEFAULT-NEXT: store i32 0, ptr [[TMP9]], align 8 -; DEFAULT-NEXT: store i32 0, ptr [[TMP10]], align 8 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; DEFAULT: middle.block: -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP173_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: br label [[FOR_BODY174:%.*]] -; DEFAULT: loop: -; DEFAULT-NEXT: [[I167_0800:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[INC179:%.*]], [[FOR_BODY174]] ] -; DEFAULT-NEXT: [[CONV169801:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[CONV169:%.*]], [[FOR_BODY174]] ] -; DEFAULT-NEXT: [[PATCH_INDEX:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[CONV169801]], i32 2 -; DEFAULT-NEXT: store i32 0, ptr [[PATCH_INDEX]], align 8 -; DEFAULT-NEXT: [[INC179]] = add i32 [[I167_0800]], 1 -; DEFAULT-NEXT: [[CONV169]] = zext i32 [[INC179]] to i64 -; DEFAULT-NEXT: [[CMP172:%.*]] = icmp ult i64 [[CONV169]], [[N]] -; DEFAULT-NEXT: br i1 [[CMP172]], label [[FOR_BODY174]], label [[FOR_COND_CLEANUP173_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] -; DEFAULT: exit: -; DEFAULT-NEXT: ret void -; -; PRED-LABEL: define void @zext_iv_increment( -; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { -; PRED-NEXT: entry: -; PRED-NEXT: br label [[FOR_BODY174:%.*]] -; PRED: loop: -; PRED-NEXT: [[I167_0800:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC179:%.*]], [[FOR_BODY174]] ] -; PRED-NEXT: [[CONV169801:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[CONV169:%.*]], [[FOR_BODY174]] ] -; PRED-NEXT: [[PATCH_INDEX:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[CONV169801]], i32 2 -; PRED-NEXT: store i32 0, ptr [[PATCH_INDEX]], align 8 -; PRED-NEXT: [[INC179]] = add i32 [[I167_0800]], 1 -; PRED-NEXT: [[CONV169]] = zext i32 [[INC179]] to i64 -; PRED-NEXT: [[CMP172:%.*]] = icmp ult i64 [[CONV169]], [[N]] -; PRED-NEXT: br i1 [[CMP172]], label [[FOR_BODY174]], label [[FOR_COND_CLEANUP173_LOOPEXIT:%.*]] -; PRED: exit: -; PRED-NEXT: ret void +; CHECK-LABEL: define void @zext_iv_increment( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 1, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2 +; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 8 +; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_NEXT_EXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PATCH_INDEX:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[IV_WIDE]], i32 2 +; CHECK-NEXT: store i32 0, ptr [[PATCH_INDEX]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT_EXT]] = zext i32 [[IV_NEXT]] to i64 +; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV_NEXT_EXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -536,4 +436,8 @@ exit: ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index 1a4ed0f21bf4c56..292d8e2077d2025 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -27,7 +27,7 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 2) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 @@ -47,21 +47,21 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = add <8 x i8> [[WIDE_LOAD6]], +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = add <8 x i8> [[WIDE_LOAD5]], splat (i8 2) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store <8 x i8> [[TMP11]], ptr [[TMP13]], align 1 -; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -130,7 +130,7 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], splat (i8 2) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 @@ -150,21 +150,21 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <8 x i8> [[WIDE_LOAD6]], +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <8 x i8> [[WIDE_LOAD5]], splat (i8 2) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store <8 x i8> [[TMP11]], ptr [[TMP13]], align 1 -; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -224,7 +224,7 @@ define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], splat (i16 2) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2 @@ -301,7 +301,7 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], splat (i16 2) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2 @@ -321,22 +321,22 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP11]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[WIDE_LOAD6]] to <8 x i16> -; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[TMP12]], +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[WIDE_LOAD5]] to <8 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[TMP12]], splat (i16 2) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP14]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr [[TMP15]], align 2 -; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -401,7 +401,7 @@ define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP7]], align 4 @@ -483,12 +483,12 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 4) +; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP6]], splat (i8 32) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], splat (i8 51) +; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], splat (i8 60) ; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i8> [[TMP7]], [[TMP1]] -; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP9]], splat (i8 -4) ; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i8> [[TMP11]], [[TMP2]] ; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i8> [[TMP12]], [[TMP10]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] @@ -508,36 +508,36 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT7]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT8]] to <8 x i8> -; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT9]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT10]] to <8 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT6]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT7]] to <8 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT9]] to <8 x i8> ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX11]], 0 +; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX10]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i8> [[WIDE_LOAD12]], -; CHECK-NEXT: [[TMP23:%.*]] = add <8 x i8> [[TMP22]], -; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i8> [[WIDE_LOAD12]], -; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i8> [[TMP24]], +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i8> [[WIDE_LOAD11]], splat (i8 4) +; CHECK-NEXT: [[TMP23:%.*]] = add <8 x i8> [[TMP22]], splat (i8 32) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i8> [[WIDE_LOAD11]], splat (i8 51) +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i8> [[TMP24]], splat (i8 60) ; CHECK-NEXT: [[TMP26:%.*]] = and <8 x i8> [[TMP23]], [[TMP17]] -; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i8> [[TMP25]], +; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i8> [[TMP25]], splat (i8 -4) ; CHECK-NEXT: [[TMP28:%.*]] = xor <8 x i8> [[TMP27]], [[TMP18]] ; CHECK-NEXT: [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], [[TMP26]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i32 0 ; CHECK-NEXT: store <8 x i8> [[TMP29]], ptr [[TMP31]], align 1 -; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX11]], 8 -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC5]] +; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 8 +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP32]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -632,13 +632,13 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[WIDE_LOAD]] to <16 x i8> -; CHECK-NEXT: [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i8> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP6]], -; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 4) +; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i8> [[TMP7]], splat (i8 32) +; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP6]], splat (i8 -52) +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], splat (i8 51) +; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], splat (i8 60) ; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i8> [[TMP8]], [[TMP1]] -; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i8> [[TMP11]], +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i8> [[TMP11]], splat (i8 -4) ; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i8> [[TMP13]], [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = mul <16 x i8> [[TMP14]], [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] @@ -658,38 +658,38 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT7]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT8]] to <8 x i8> -; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT9]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT10]] to <8 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT6]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT7]] to <8 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT9]] to <8 x i8> ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX11]], 0 +; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX10]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i16>, ptr [[TMP23]], align 2 -; CHECK-NEXT: [[TMP24:%.*]] = trunc <8 x i16> [[WIDE_LOAD12]] to <8 x i8> -; CHECK-NEXT: [[TMP25:%.*]] = shl <8 x i8> [[TMP24]], -; CHECK-NEXT: [[TMP26:%.*]] = add <8 x i8> [[TMP25]], -; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i8> [[TMP24]], -; CHECK-NEXT: [[TMP28:%.*]] = or <8 x i8> [[TMP27]], -; CHECK-NEXT: [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i16>, ptr [[TMP23]], align 2 +; CHECK-NEXT: [[TMP24:%.*]] = trunc <8 x i16> [[WIDE_LOAD11]] to <8 x i8> +; CHECK-NEXT: [[TMP25:%.*]] = shl <8 x i8> [[TMP24]], splat (i8 4) +; CHECK-NEXT: [[TMP26:%.*]] = add <8 x i8> [[TMP25]], splat (i8 32) +; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i8> [[TMP24]], splat (i8 -52) +; CHECK-NEXT: [[TMP28:%.*]] = or <8 x i8> [[TMP27]], splat (i8 51) +; CHECK-NEXT: [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], splat (i8 60) ; CHECK-NEXT: [[TMP30:%.*]] = and <8 x i8> [[TMP26]], [[TMP19]] -; CHECK-NEXT: [[TMP31:%.*]] = and <8 x i8> [[TMP29]], +; CHECK-NEXT: [[TMP31:%.*]] = and <8 x i8> [[TMP29]], splat (i8 -4) ; CHECK-NEXT: [[TMP32:%.*]] = xor <8 x i8> [[TMP31]], [[TMP20]] ; CHECK-NEXT: [[TMP33:%.*]] = mul <8 x i8> [[TMP32]], [[TMP30]] ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i32 0 ; CHECK-NEXT: store <8 x i8> [[TMP33]], ptr [[TMP35]], align 1 -; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX11]], 8 -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC5]] +; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 8 +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -776,14 +776,14 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8> -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <16 x i32> [[TMP4]], splat (i32 2) +; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] @@ -798,10 +798,10 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] @@ -859,14 +859,14 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP8]] to <16 x i8> -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <16 x i32> [[TMP6]], splat (i32 2) +; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP10]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 @@ -877,15 +877,15 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[A_PHI:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[A_PHI_LCSSA]] to i8 ; CHECK-NEXT: ret i8 [[RET]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SCALAR_RECUR]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[A_PHI]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV]] = zext i8 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV]] = zext i8 [[TMP12]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index 2b6933654ac1ae3..29795bc9f298216 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -26,7 +26,7 @@ ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] @@ -34,15 +34,15 @@ ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> %[[InnerPhi]], -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) +; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) +; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body @@ -92,22 +92,22 @@ for.end10: ; preds = %for.inc8 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] ; CHECK: %[[VecInd:.*]] = phi <2 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, <2 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[VecInd]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> ) +; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[VecInd]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> splat (i1 true)) ; CHECK: %[[StoreVal:.*]] = add nsw <2 x i64> %[[VecInd]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> -; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], -; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], +; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> splat (i1 true)) +; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], splat (i64 1) +; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], splat (i64 8) ; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 2 -; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], splat (i64 2) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body ; Function Attrs: norecurse nounwind uwtable diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll index a8caae40aed1617..4c31cfc14afb303 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll @@ -47,7 +47,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; SC_SVE-NEXT: [[TMP16:%.*]] = shl <4 x i32> [[TMP15]], [[BROADCAST_SPLAT]] ; SC_SVE-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], [[VEC_PHI]] ; SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SC_SVE: middle.block: @@ -122,7 +122,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; NO_SC_SVE-NEXT: [[TMP16:%.*]] = shl <8 x i32> [[TMP15]], [[BROADCAST_SPLAT]] ; NO_SC_SVE-NEXT: [[TMP17]] = add <8 x i32> [[TMP16]], [[VEC_PHI]] ; NO_SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; NO_SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; NO_SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; NO_SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO_SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO_SC_SVE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll index c9db83b935fb8e2..fb5d513dfbd75e5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -115,9 +115,9 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-UNORDERED-LABEL: @fadd_strict_unroll ; CHECK-UNORDERED: vector.body ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_LOAD3:.*]] = load <8 x float>, ptr @@ -195,9 +195,9 @@ define float @fadd_strict_unroll_last_val(ptr noalias nocapture readonly %a, ptr ; CHECK-UNORDERED-LABEL: @fadd_strict_unroll_last_val ; CHECK-UNORDERED: vector.body ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_LOAD3:.*]] = load <8 x float>, ptr @@ -273,8 +273,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED: %[[LOADA1:.*]] = load float, ptr %a ; CHECK-UNORDERED: %[[LOADA2:.*]] = load float, ptr %[[ARRAYIDX]] ; CHECK-UNORDERED: vector.ph -; CHECK-UNORDERED: %[[INS2:.*]] = insertelement <4 x float> , float %[[LOADA2]], i32 0 -; CHECK-UNORDERED: %[[INS1:.*]] = insertelement <4 x float> , float %[[LOADA1]], i32 0 +; CHECK-UNORDERED: %[[INS2:.*]] = insertelement <4 x float> splat (float -0.000000e+00), float %[[LOADA2]], i32 0 +; CHECK-UNORDERED: %[[INS1:.*]] = insertelement <4 x float> splat (float -0.000000e+00), float %[[LOADA1]], i32 0 ; CHECK-UNORDERED: vector.body ; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <4 x float> [ %[[INS2]], %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <4 x float> [ %[[INS1]], %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ] @@ -401,8 +401,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED: br i1 %[[EXTRACT]], label %pred.load.if, label %pred.load.continue ; CHECK-ORDERED: pred.load.continue6 ; CHECK-ORDERED: %[[PHI1:.*]] = phi <4 x float> [ %[[PHI0:.*]], %pred.load.continue4 ], [ %[[INS_ELT:.*]], %pred.load.if5 ] -; CHECK-ORDERED: %[[XOR:.*]] = xor <4 x i1> %[[FCMP1]], -; CHECK-ORDERED: %[[PRED:.*]] = select <4 x i1> %[[XOR]], <4 x float> , <4 x float> %[[PHI1]] +; CHECK-ORDERED: %[[XOR:.*]] = xor <4 x i1> %[[FCMP1]], splat (i1 true) +; CHECK-ORDERED: %[[PRED:.*]] = select <4 x i1> %[[XOR]], <4 x float> splat (float 3.000000e+00), <4 x float> %[[PHI1]] ; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[PHI]], <4 x float> %[[PRED]]) ; CHECK-ORDERED: for.body ; CHECK-ORDERED: %[[RES_PHI:.*]] = phi float [ %[[MERGE_RDX:.*]], %scalar.ph ], [ %[[FADD:.*]], %for.inc ] @@ -427,8 +427,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED: %[[EXTRACT:.*]] = extractelement <4 x i1> %[[FCMP1]], i32 0 ; CHECK-UNORDERED: br i1 %[[EXTRACT]], label %pred.load.if, label %pred.load.continue ; CHECK-UNORDERED: pred.load.continue6 -; CHECK-UNORDERED: %[[XOR:.*]] = xor <4 x i1> %[[FCMP1]], -; CHECK-UNORDERED: %[[PRED:.*]] = select <4 x i1> %[[XOR]], <4 x float> , <4 x float> %[[PRED_PHI:.*]] +; CHECK-UNORDERED: %[[XOR:.*]] = xor <4 x i1> %[[FCMP1]], splat (i1 true) +; CHECK-UNORDERED: %[[PRED:.*]] = select <4 x i1> %[[XOR]], <4 x float> splat (float 3.000000e+00), <4 x float> %[[PRED_PHI:.*]] ; CHECK-UNORDERED: %[[VEC_FADD]] = fadd <4 x float> %[[PHI]], %[[PRED]] ; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd ; CHECK-UNORDERED: middle.block @@ -489,7 +489,7 @@ define float @fadd_predicated(ptr noalias nocapture %a, i64 %n) { ; CHECK-ORDERED: %[[RDX_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %pred.load.continue2 ] ; CHECK-ORDERED: pred.load.continue2 ; CHECK-ORDERED: %[[PHI:.*]] = phi <2 x float> [ %[[PHI0:.*]], %pred.load.continue ], [ %[[INS_ELT:.*]], %pred.load.if1 ] -; CHECK-ORDERED: %[[MASK:.*]] = select <2 x i1> %0, <2 x float> %[[PHI]], <2 x float> +; CHECK-ORDERED: %[[MASK:.*]] = select <2 x i1> %0, <2 x float> %[[PHI]], <2 x float> splat (float -0.000000e+00) ; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.v2f32(float %[[RDX_PHI]], <2 x float> %[[MASK]]) ; CHECK-ORDERED: for.end: ; CHECK-ORDERED: %[[RES_PHI:.*]] = phi float [ %[[FADD:.*]], %for.body ], [ %[[RDX]], %middle.block ] @@ -544,7 +544,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-LABEL: @fadd_multiple ; CHECK-UNORDERED: vector.body -; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <8 x float> %[[PHI]], %[[VEC_LOAD1]] ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, ptr @@ -593,7 +593,7 @@ define float @fadd_multiple_one_flag(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-UNORDERED-LABEL: @fadd_multiple_one_flag ; CHECK-UNORDERED: vector.body -; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <8 x float> %[[PHI]], %[[VEC_LOAD1]] ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, ptr @@ -693,7 +693,7 @@ define float @fast_induction_and_reduction(ptr nocapture readonly %values, float ; CHECK-ORDERED: %[[IND_PHI:.*]] = phi <4 x float> [ %[[INDUCTION]], %vector.ph ], [ %[[VEC_IND_NEXT:.*]], %vector.body ] ; CHECK-ORDERED: %[[LOAD1:.*]] = load <4 x float>, ptr ; CHECK-ORDERED: %[[FADD1:.*]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[RDX_PHI]], <4 x float> %[[LOAD1]]) -; CHECK-ORDERED: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[IND_PHI]], +; CHECK-ORDERED: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[IND_PHI]], splat (float 8.000000e+00) ; CHECK-ORDERED: for.body ; CHECK-ORDERED: %[[RDX_SUM_PHI:.*]] = phi float [ {{.*}}, %scalar.ph ], [ %[[FADD2:.*]], %for.body ] ; CHECK-ORDERED: %[[IND_SUM_PHI:.*]] = phi fast float [ {{.*}}, %scalar.ph ], [ %[[ADD_IND:.*]], %for.body ] @@ -713,7 +713,7 @@ define float @fast_induction_and_reduction(ptr nocapture readonly %values, float ; CHECK-UNORDERED: %[[IND_PHI:.*]] = phi <4 x float> [ %[[INDUCTION]], %vector.ph ], [ %[[VEC_IND_NEXT:.*]], %vector.body ] ; CHECK-UNORDERED: %[[LOAD1:.*]] = load <4 x float>, ptr ; CHECK-UNORDERED: %[[VEC_FADD]] = fadd <4 x float> %[[RDX_PHI]], %[[LOAD1]] -; CHECK-UNORDERED: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[IND_PHI]], +; CHECK-UNORDERED: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[IND_PHI]], splat (float 8.000000e+00) ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED: %[[VEC_RDX:.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> %[[VEC_FADD]]) ; CHECK-UNORDERED: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 87326d3a4a79266..c38c4b08cb5b571 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -138,7 +138,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 -; CHECK-VF8-NEXT: store <8 x i8> , ptr [[TMP21]], align 1 +; CHECK-VF8-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP21]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -232,7 +232,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 -; CHECK-NEXT: store <8 x i64> , ptr [[TMP21]], align 1 +; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -295,7 +295,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 -; CHECK-VF8-NEXT: store <8 x i64> , ptr [[TMP21]], align 1 +; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll index 375bbb74d3d48cd..cf1dd467647fecb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll @@ -83,7 +83,7 @@ for.end: define dso_local void @loop_fixed_width_i128(ptr nocapture %ptr, i64 %N) { ; CHECK-LABEL: @loop_fixed_width_i128 ; CHECK: load <4 x i128>, ptr -; CHECK: add nsw <4 x i128> {{.*}}, +; CHECK: add nsw <4 x i128> {{.*}}, splat (i128 42) ; CHECK: store <4 x i128> {{.*}} ptr ; CHECK-NOT: vscale entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 1d150141e6251e6..a33234070867aaf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -95,7 +95,7 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX1]], i64 [[TMP2]]) -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index 994f2f5e3776321..af1c63bc405a381 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -338,7 +338,7 @@ define void @test_v2_v4m(ptr noalias %a, ptr readonly %b) #3 { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_mask(<4 x i64> [[WIDE_LOAD]], <4 x i1> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_mask(<4 x i64> [[WIDE_LOAD]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll index 187f50f2e76a480..ed5467258c71fcb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -27,7 +27,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP7]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 4 ; CHECK-NEXT: [[NEXT_ACTIVE_LANE_MASK]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[IDX]], i64 [[N2]]) -; CHECK-NEXT: [[NOT_ACTIVE_LANE_MASK:%.*]] = xor <4 x i1> [[NEXT_ACTIVE_LANE_MASK]], +; CHECK-NEXT: [[NOT_ACTIVE_LANE_MASK:%.*]] = xor <4 x i1> [[NEXT_ACTIVE_LANE_MASK]], splat (i1 true) ; CHECK-NEXT: [[FIRST_LANE_SET:%.*]] = extractelement <4 x i1> [[NOT_ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: br i1 [[FIRST_LANE_SET]], label %middle.block, label %vector.body @@ -63,7 +63,7 @@ define void @cond_uniform_load(ptr nocapture %dst, ptr nocapture readonly %src, ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0 ; CHECK: [[COND_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{%.*}}, i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[COND_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[MASK:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer ; CHECK-NEXT: call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[SRC_SPLAT]], i32 4, <4 x i1> [[MASK]], <4 x i32> poison) entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll index f53fd5bc8d15a0b..c017c1f3b36ee8f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -20,7 +20,7 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll index 57807604b37a871..1dd49ecf85b81ba 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -49,8 +49,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N) ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) ; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) -; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], -; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], splat (double 1.000000e+00) +; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], splat (double 1.000000e+00) ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP10]], ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]]) ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP11]], ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index c0933a9445f94da..fda9ef2cf6c2f31 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -20,7 +20,7 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) @@ -84,7 +84,7 @@ define void @test_stride-1_4i32(ptr readonly %data, ptr noalias nocapture %dst, ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> , [[REVERSE]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> splat (i32 5), [[REVERSE]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4 @@ -150,7 +150,7 @@ define void @test_stride2_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> splat (i32 5), [[STRIDED_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4 @@ -209,16 +209,16 @@ define void @test_stride3_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], splat (i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP2]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -273,16 +273,16 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP2]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -343,7 +343,7 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) @@ -405,16 +405,16 @@ define void @test_stride_noninvar_4i32(ptr readonly %data, ptr noalias nocapture ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND2]] -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <4 x i32> [[TMP2]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP3]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], splat (i32 32) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -515,7 +515,7 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 3), [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -526,15 +526,15 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND4]] -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP5]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP6]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP6]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll index fec5921720fed9b..fde594d04836134 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll @@ -29,9 +29,9 @@ define void @arm_abs_q7(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 % ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 -128) ; CHECK-NEXT: [[TMP3:%.*]] = sub <16 x i8> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> , <16 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> splat (i8 127), <16 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[WIDE_LOAD]], <16 x i8> [[TMP4]] ; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[NEXT_GEP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 @@ -124,9 +124,9 @@ define void @arm_abs_q15(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX7]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], splat (i16 -32768) ; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> , <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[WIDE_LOAD]], <8 x i16> [[TMP6]] ; CHECK-NEXT: store <8 x i16> [[TMP7]], ptr [[NEXT_GEP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -219,9 +219,9 @@ define void @arm_abs_q31(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX7]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 -2147483648) ; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> , <4 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP6]] ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[NEXT_GEP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll index 1371d80b00cadda..6011ccfd1561482 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -72,7 +72,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD1]]) ; CHECK-NEXT: [[TMP8]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -115,7 +115,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) @@ -167,14 +167,14 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP4]] = and <4 x i32> [[VEC_PHI]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 @@ -427,7 +427,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) @@ -480,7 +480,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index b5ae9700217eae5..832d4db53036fb6 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -251,7 +251,7 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] @@ -318,7 +318,7 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] @@ -586,7 +586,7 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i32 [[TMP0]] @@ -653,7 +653,7 @@ define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -722,7 +722,7 @@ define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -791,7 +791,7 @@ define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll index b0cccf4d0a7bfad..60c3b52f2003a6b 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll @@ -45,7 +45,7 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 % ; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]] ; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> , <4 x float> [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> splat (float -0.000000e+00), <4 x float> [[TMP9]] ; CHECK-NEXT: [[PREDPHI]] = fadd reassoc arcp contract afn <4 x float> [[VEC_PHI]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index 3432773b4e1b3d3..d49639db3bbfcd0 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -111,7 +111,7 @@ define hidden void @pointer_phi_v4i32_add3(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -544,7 +544,7 @@ define hidden void @pointer_phi_v4f32_add3(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x float> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x float> poison) ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -749,8 +749,8 @@ define hidden void @pointer_phi_v4i32_uf2(ptr noalias nocapture readonly %A, ptr ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 @@ -816,10 +816,10 @@ define hidden void @pointer_phi_v4i32_uf4(ptr noalias nocapture readonly %A, ptr ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER7]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER8]], [[BROADCAST_SPLAT]] @@ -886,18 +886,18 @@ define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias n ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope [[META28:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 2 -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope [[META28]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP3]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope [[META28]] -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP3]], i32 1, <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28]] +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], splat (i8 10) ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]] ; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> [[TMP1]], i32 1, <4 x i1> ), !alias.scope [[META31:![0-9]+]], !noalias [[META28]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> [[TMP1]], i32 1, <4 x i1> splat (i1 true)), !alias.scope [[META31:![0-9]+]], !noalias [[META28]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 2 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP5]], <4 x ptr> [[TMP7]], i32 1, <4 x i1> ), !alias.scope [[META31]], !noalias [[META28]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP6]], <4 x ptr> [[TMP8]], i32 1, <4 x i1> ), !alias.scope [[META31]], !noalias [[META28]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP5]], <4 x ptr> [[TMP7]], i32 1, <4 x i1> splat (i1 true)), !alias.scope [[META31]], !noalias [[META28]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP6]], <4 x ptr> [[TMP8]], i32 1, <4 x i1> splat (i1 true)), !alias.scope [[META31]], !noalias [[META28]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12 ; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI5]], i32 12 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll index 7db5bccd896b274..6f7b45beeb6e7a9 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll @@ -16,8 +16,8 @@ define arm_aapcs_vfpcc i32 @minmaxval4(ptr nocapture readonly %x, ptr nocapture ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]]) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index ffeca069dc0ea41..0bac16306730679 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -25,7 +25,7 @@ define void @trunc_not_allowed_different_vec_elemns(ptr noalias nocapture %A, pt ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = shl <4 x i16> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = shl <4 x i16> [[TMP8]], splat (i16 1) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[D:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP11]], align 2 @@ -54,7 +54,7 @@ define void @trunc_not_allowed_different_vec_elemns(ptr noalias nocapture %A, pt ; CHECK-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX8]], align 2 ; CHECK-NEXT: [[ADD9]] = add nuw nsw i32 [[I_021]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD9]], 431 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: br label %for.body @@ -451,7 +451,7 @@ define void @fptrunc_not_allowed(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <4 x float> [[TMP5]] to <4 x half> -; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x half> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x half> [[TMP8]], splat (half 0xH4000) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[D:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <4 x half> [[TMP9]], ptr [[TMP11]], align 2 @@ -537,12 +537,12 @@ define dso_local void @select_not_allowed(ptr noalias nocapture %A, ptr noalias ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x ptr> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, <4 x ptr> [[TMP4]], <4 x i32> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -611,7 +611,7 @@ define i32 @i32_smin_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -680,7 +680,7 @@ define i32 @i32_smax_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -749,7 +749,7 @@ define i32 @i32_umin_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 4888f3d6383c859..55fd3530806d25c 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -33,13 +33,13 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP49:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP50:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <2 x i64> [[STEP_ADD4]], -; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <2 x i64> [[STEP_ADD5]], -; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <2 x i64> [[STEP_ADD6]], -; CHECK-NEXT: [[STEP_ADD8:%.*]] = add <2 x i64> [[STEP_ADD7]], -; CHECK-NEXT: [[STEP_ADD9:%.*]] = add <2 x i64> [[STEP_ADD8]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_4:%.*]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_5:%.*]] = add <2 x i64> [[STEP_ADD_4]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_6:%.*]] = add <2 x i64> [[STEP_ADD_5]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_7:%.*]] = add <2 x i64> [[STEP_ADD_6]], splat (i64 2) ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 @@ -66,14 +66,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[TMP24:%.*]] = zext <2 x i8> [[WIDE_LOAD29]] to <2 x i64> ; CHECK-NEXT: [[TMP25:%.*]] = zext <2 x i8> [[WIDE_LOAD30]] to <2 x i64> ; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[WIDE_LOAD31]] to <2 x i64> -; CHECK-NEXT: [[TMP27:%.*]] = shl <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP28:%.*]] = shl <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP29:%.*]] = shl <2 x i64> [[STEP_ADD4]], -; CHECK-NEXT: [[TMP30:%.*]] = shl <2 x i64> [[STEP_ADD5]], -; CHECK-NEXT: [[TMP31:%.*]] = shl <2 x i64> [[STEP_ADD6]], -; CHECK-NEXT: [[TMP32:%.*]] = shl <2 x i64> [[STEP_ADD7]], -; CHECK-NEXT: [[TMP33:%.*]] = shl <2 x i64> [[STEP_ADD8]], -; CHECK-NEXT: [[TMP34:%.*]] = shl <2 x i64> [[STEP_ADD9]], +; CHECK-NEXT: [[TMP27:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP28:%.*]] = shl <2 x i64> [[STEP_ADD]], splat (i64 1) +; CHECK-NEXT: [[TMP29:%.*]] = shl <2 x i64> [[STEP_ADD_2]], splat (i64 1) +; CHECK-NEXT: [[TMP30:%.*]] = shl <2 x i64> [[STEP_ADD_3]], splat (i64 1) +; CHECK-NEXT: [[TMP31:%.*]] = shl <2 x i64> [[STEP_ADD_4]], splat (i64 1) +; CHECK-NEXT: [[TMP32:%.*]] = shl <2 x i64> [[STEP_ADD_5]], splat (i64 1) +; CHECK-NEXT: [[TMP33:%.*]] = shl <2 x i64> [[STEP_ADD_6]], splat (i64 1) +; CHECK-NEXT: [[TMP34:%.*]] = shl <2 x i64> [[STEP_ADD_7]], splat (i64 1) ; CHECK-NEXT: [[TMP35:%.*]] = shl <2 x i64> [[TMP19]], [[TMP27]] ; CHECK-NEXT: [[TMP36:%.*]] = shl <2 x i64> [[TMP20]], [[TMP28]] ; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i64> [[TMP21]], [[TMP29]] @@ -91,7 +91,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[TMP49]] = or <2 x i64> [[TMP41]], [[VEC_PHI16]] ; CHECK-NEXT: [[TMP50]] = or <2 x i64> [[TMP42]], [[VEC_PHI17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD9]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD_7]], splat (i64 2) ; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index a6797dea0836c24..ba9d49fc682c4c6 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -372,14 +372,14 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], -; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], -; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], -; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], -; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], -; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], -; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], +; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] ; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 @@ -427,7 +427,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -1 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> -; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], +; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP97]] ; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, ptr [[TMP106]], i32 0 ; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP105]], ptr [[TMP107]], align 4 @@ -531,14 +531,14 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], -; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], -; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], -; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], -; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], -; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], -; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], +; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] ; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 0 ; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 @@ -586,7 +586,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], +; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP97]] ; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, ptr [[TMP106]], i32 0 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP105]], ptr [[TMP107]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 6c8e3606c53f2ff..ab62addb94b9f50 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -486,8 +486,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> -; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> +; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP5]] ; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]] ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]] @@ -614,8 +614,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> -; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> +; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP5]] ; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]] ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]] @@ -736,10 +736,10 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42) +; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42) +; FIXED-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], splat (i64 27) +; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27) ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8 @@ -858,10 +858,10 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42) +; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42) +; FIXED-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], splat (i64 27) +; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27) ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8 @@ -981,10 +981,10 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1 -; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> , <32 x i8> -; FIXED-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> , <32 x i8> +; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD]], splat (i8 -128) +; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128) +; FIXED-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1) +; FIXED-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1) ; FIXED-NEXT: [[TMP8:%.*]] = sdiv <32 x i8> [[WIDE_LOAD]], [[TMP6]] ; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]] ; FIXED-NEXT: [[PREDPHI:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> [[TMP8]], <32 x i8> [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 2789ab484e1b665..ad10ddc26fc7a58 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -71,8 +71,8 @@ define void @load_store_factor2_i32(ptr %p) { ; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], +; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1) +; FIXED-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2) ; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> ; FIXED-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4 @@ -248,8 +248,8 @@ define void @load_store_factor2_i64(ptr %p) { ; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], +; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1) +; FIXED-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2) ; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> ; FIXED-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 @@ -372,9 +372,9 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2) +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3) ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> @@ -425,9 +425,9 @@ define void @load_store_factor3_i32(ptr %p) { ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], -; FIXED-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], +; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1) +; FIXED-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2) +; FIXED-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3) ; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> ; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> @@ -478,9 +478,9 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> -; SCALABLE-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; SCALABLE-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], -; SCALABLE-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], +; SCALABLE-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1) +; SCALABLE-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2) +; SCALABLE-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3) ; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> ; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> @@ -562,9 +562,9 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3) ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> @@ -615,9 +615,9 @@ define void @load_store_factor3_i64(ptr %p) { ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], -; FIXED-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], +; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1) +; FIXED-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2) +; FIXED-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3) ; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> ; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> @@ -668,9 +668,9 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> -; SCALABLE-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; SCALABLE-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], -; SCALABLE-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], +; SCALABLE-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1) +; SCALABLE-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2) +; SCALABLE-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3) ; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> ; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> @@ -757,14 +757,14 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], -; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], -; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], -; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], -; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2) +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3) +; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4) +; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5) +; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6) +; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7) +; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8) ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> @@ -849,14 +849,14 @@ define void @load_store_factor8(ptr %p) { ; FIXED-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; FIXED-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; FIXED-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], -; FIXED-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], -; FIXED-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], -; FIXED-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], -; FIXED-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], -; FIXED-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], -; FIXED-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], +; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1) +; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2) +; FIXED-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3) +; FIXED-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4) +; FIXED-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5) +; FIXED-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6) +; FIXED-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7) +; FIXED-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8) ; FIXED-NEXT: [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> ; FIXED-NEXT: [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> ; FIXED-NEXT: [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> @@ -941,14 +941,14 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> -; SCALABLE-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], -; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], -; SCALABLE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], -; SCALABLE-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], -; SCALABLE-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], -; SCALABLE-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], -; SCALABLE-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], -; SCALABLE-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], +; SCALABLE-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1) +; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2) +; SCALABLE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3) +; SCALABLE-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4) +; SCALABLE-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5) +; SCALABLE-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6) +; SCALABLE-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7) +; SCALABLE-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8) ; SCALABLE-NEXT: [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> ; SCALABLE-NEXT: [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> ; SCALABLE-NEXT: [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index ec50b0cac038293..07a5c53894cdece 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -261,7 +261,7 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 @@ -322,7 +322,7 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl <32 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <32 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 @@ -382,7 +382,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index f63b1b73dc522b3..02b7ce3795ad520 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -9,8 +9,8 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], splat (i64 48) +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], splat (i64 52) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer @@ -21,10 +21,10 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3) -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]] @@ -54,7 +54,7 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll index 7f4eb387a1ece69..95eb67b3823ee62 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: foo ; CHECK: LV: IC is 2 -; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, +; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, splat (i32 8) ; CHECK: %{{.*}} = add {{.*}}, 16 ; Function Attrs: nofree norecurse nosync nounwind writeonly diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 8388d018b89e82a..d51426316b1e3c2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -21,7 +21,7 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -96,7 +96,7 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -168,8 +168,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -239,8 +239,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -310,8 +310,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -406,11 +406,11 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 35) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x i32> poison) -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP8]] ; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP9]], <4 x i1> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index fa13cd8f19ae562..d0e14e5ba44c7be 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -162,7 +162,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i8> [[BROADCAST_SPLAT]] to <2 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> , [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> splat (i1 true), [[TMP0]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x ptr> poison, ptr [[DST]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT3]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 06a4f98d3dc7266..fadaf54255a77d7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -379,10 +379,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], -; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) +; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10) ; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP1]], <4 x i64> poison) ; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) ; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer @@ -393,7 +393,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: middle.block: @@ -490,7 +490,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) ; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer @@ -498,7 +498,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -1184,10 +1184,10 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], -; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) +; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10) ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP1]]) ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]]) ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] @@ -1196,7 +1196,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXEDLEN: middle.block: @@ -1294,14 +1294,14 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]]) ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-FIXEDLEN: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll index e594fce5a7a7994..06d0eb3fc203d75 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll @@ -154,12 +154,12 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; NO-VP: vector.ph: ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> , i32 [[START:%.*]], i32 0 +; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 @@ -1223,12 +1223,12 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; NO-VP: vector.ph: ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x float> , float [[START:%.*]], i32 0 +; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP1]] ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll index 99a7d1d34f26d8b..e7fdfbcf76caa40 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll @@ -25,8 +25,8 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 4, <2 x i1> ), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 4, <2 x i1> ), !alias.scope [[META3]], !noalias [[META0]] +; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 4, <2 x i1> splat (i1 true)), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 4, <2 x i1> splat (i1 true)), !alias.scope [[META3]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll index 3477c8d879106b6..107ae504612968f 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll @@ -16,7 +16,7 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], splat (i64 8) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll index 5e8f287217478b7..f828b74bc8bf699 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll @@ -9,13 +9,13 @@ define void @test(ptr %p, i40 %a) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], splat (i40 24) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], splat (i40 28) ; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1> ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -23,7 +23,7 @@ define void @test(ptr %p, i40 %a) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT3]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], splat (i32 9) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index 9a1e613a736bf36..699b8487a065a51 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -22,7 +22,7 @@ define void @func_21() { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -59,7 +59,7 @@ define void @func_21() { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -78,7 +78,7 @@ define void @func_21() { ; CHECK-NEXT: store i32 [[SCALAR_RECUR]], ptr [[B_PTR]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index bb35806cea20a1a..64e57747bf2ae14 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -36,9 +36,9 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <80 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]] -; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP5]], <16 x ptr> [[TMP3]], i32 4, <16 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP5]], <16 x ptr> [[TMP3]], i32 4, <16 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 80) ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body %data = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float] } @@ -87,7 +87,7 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE: vector.body: ; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ] -; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], +; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 2) ; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FORCE: pred.store.if: @@ -103,7 +103,7 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE-NEXT: br label [[PRED_STORE_CONTINUE4]] ; FORCE: pred.store.continue2: ; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 ; FORCE-NEXT: br i1 [[TMP15]], label {{%.*}}, label [[VECTOR_BODY]] ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 0418f93498bc203..3363aea39e73f6f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -30,7 +30,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <32 x i8> [[VEC_IND]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[VEC_IND]], splat (i8 32) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -61,7 +61,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <16 x i8> [[VEC_IND9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX8]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], +; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], splat (i8 16) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -118,15 +118,15 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <2 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 9, [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[STEP_ADD]], splat (i64 3) +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[STEP_ADD_2]], splat (i64 3) +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[STEP_ADD_3]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = sitofp <2 x i64> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP13:%.*]] = sitofp <2 x i64> [[TMP9]] to <2 x float> ; CHECK-NEXT: [[TMP14:%.*]] = sitofp <2 x i64> [[TMP10]] to <2 x float> @@ -141,7 +141,7 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[TMP22]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[TMP23]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll index 6060a7318deb640..8b47aee6bf389b6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll @@ -15,10 +15,10 @@ define i64 @test_foldable_live_in_via_scev() { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = mul <2 x i64> [[VEC_PHI]], -; CHECK-NEXT: [[TMP1]] = mul <2 x i64> [[VEC_PHI1]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0]] = mul <2 x i64> [[VEC_PHI]], splat (i64 2) +; CHECK-NEXT: [[TMP1]] = mul <2 x i64> [[VEC_PHI1]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -73,8 +73,8 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[STEP_ADD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], zeroinitializer @@ -88,8 +88,8 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: [[TMP10]] = or <2 x i64> [[TMP8]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP11]] = or <2 x i64> [[TMP9]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[STEP_ADD4]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[STEP_ADD4]], splat (i32 2) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 29e54fabad0c1bb..6f67d1e283e3951 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -426,8 +426,8 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x ptr> [[TMP20]], ptr [[TMP19]], i32 1 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq <2 x ptr> [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq <2 x ptr> [[TMP21]], zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[TMP22]], -; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP23]], +; CHECK-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[TMP22]], splat (i1 true) +; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP23]], splat (i1 true) ; CHECK-NEXT: [[TMP26]] = or <2 x i1> [[VEC_PHI]], [[TMP24]] ; CHECK-NEXT: [[TMP27]] = or <2 x i1> [[VEC_PHI3]], [[TMP25]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -782,10 +782,10 @@ define i64 @cost_assume(ptr %end, i64 %N) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[VEC_PHI]], -; CHECK-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI2]], -; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI3]], -; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], +; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI2]], splat (i64 1) +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI3]], splat (i64 1) +; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) @@ -851,13 +851,13 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i1> [[BROADCAST_SPLAT]] to <4 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[TMP0]], splat (i64 12) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11]] = and <4 x i32> [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[TMP12]] = and <4 x i32> [[VEC_PHI1]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -932,7 +932,7 @@ define i64 @live_in_known_1_via_scev() { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 @@ -986,7 +986,7 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i64> ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -994,7 +994,7 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] @@ -1051,8 +1051,8 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], +; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i1> diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll index 4c3c2e89cd204b0..c861aa8172b9b87 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll @@ -156,7 +156,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP29]], i32 8, <4 x i1> [[TMP8]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 7b7b36e15cd5b2d..f6f77e274273e38 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -26,7 +26,7 @@ define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 @@ -69,7 +69,7 @@ define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0 entry: @@ -107,9 +107,9 @@ define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP7]], i32 4, <4 x i1> [[TMP8]], <4 x float> poison), !invariant.load !0 entry: @@ -149,9 +149,9 @@ define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]] -; CHECK: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP12]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 @@ -192,8 +192,8 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 @@ -231,10 +231,10 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = sdiv i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP9]], i32 0 @@ -286,12 +286,12 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison) -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3 ; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: @@ -355,11 +355,11 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], -; CHECK-NEXT: [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP8]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP9]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 ; @@ -401,8 +401,8 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 @@ -495,7 +495,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_SREM_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]] ; CHECK: pred.srem.if: @@ -530,7 +530,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0 ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body ; CHECK: middle.block: @@ -574,12 +574,12 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %C, i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr %B, i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> , <4 x float> [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], splat (float 2.000000e+00) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> splat (float 3.300000e+01), <4 x float> [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index f3875ccb41668c4..9092da6092595a5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -40,15 +40,15 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[OFFSET_IDX]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -1 -; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP14:%.*]] = lshr exact <16 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i32> [[VEC_IND]], splat (i32 196608) +; CHECK-NEXT: [[TMP14:%.*]] = lshr exact <16 x i32> [[TMP13]], splat (i32 16) ; CHECK-NEXT: [[TMP15:%.*]] = trunc <16 x i32> [[TMP14]] to <16 x i16> ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[ARR:%.*]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP17]], i32 0 ; CHECK-NEXT: store <16 x i16> [[TMP15]], ptr [[TMP18]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -77,15 +77,15 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[OFFSET_IDX15]] to i32 ; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], 0 ; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], -1 -; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[VEC_IND13]], -; CHECK-NEXT: [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP24]], +; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[VEC_IND13]], splat (i32 196608) +; CHECK-NEXT: [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP24]], splat (i32 16) ; CHECK-NEXT: [[TMP26:%.*]] = trunc <8 x i32> [[TMP25]] to <8 x i16> ; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP23]] to i64 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[TMP28]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP26]], ptr [[TMP29]], align 2 ; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX9]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT14]] = add <8 x i32> [[VEC_IND13]], +; CHECK-NEXT: [[VEC_IND_NEXT14]] = add <8 x i32> [[VEC_IND13]], splat (i32 8) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -145,92 +145,90 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <16 x i16> , [[TMP2]] - +; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer - +; CHECK-NEXT: [[TMP21:%.*]] = mul <16 x i16> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP21]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16 ; CHECK-NEXT: [[IND_END:%.*]] = mul i16 [[DOTCAST]], [[TMP0]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i16> [[VEC_IND]], [[DOTSPLAT3]] -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <16 x i16> [[STEP_ADD]], [[DOTSPLAT3]] -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <16 x i16> [[STEP_ADD4]], [[DOTSPLAT3]] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i16> [[VEC_IND]], [[TMP1]] +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <16 x i16> [[STEP_ADD]], [[TMP1]] +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <16 x i16> [[STEP_ADD_2]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[STEP_ADD4]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP10:%.*]] = sub <16 x i16> [[STEP_ADD5]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 16 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 32 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 48 -; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP15]], align 2 -; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP16]], align 2 -; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr [[TMP17]], align 2 -; CHECK-NEXT: store <16 x i16> [[TMP10]], ptr [[TMP18]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 48 +; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[TMP9]], align 2 +; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP10]], align 2 +; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP11]], align 2 +; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD5]], [[DOTSPLAT3]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD_3]], [[TMP1]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[L]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[DOTCAST12:%.*]] = trunc i64 [[N_VEC]] to i16 -; CHECK-NEXT: [[IND_END13:%.*]] = mul i16 [[DOTCAST12]], [[TMP0]] +; CHECK-NEXT: [[DOTCAST9:%.*]] = trunc i64 [[N_VEC]] to i16 +; CHECK-NEXT: [[IND_END10:%.*]] = mul i16 [[DOTCAST9]], [[TMP0]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[L]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF7:%.*]] = urem i64 [[L]], 8 -; CHECK-NEXT: [[N_VEC8:%.*]] = sub i64 [[L]], [[N_MOD_VF7]] -; CHECK-NEXT: [[DOTCAST10:%.*]] = trunc i64 [[N_VEC8]] to i16 -; CHECK-NEXT: [[IND_END11:%.*]] = mul i16 [[DOTCAST10]], [[TMP0]] -; CHECK-NEXT: [[DOTSPLATINSERT17:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT18:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT17]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT19:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT20:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT19]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = mul <8 x i16> , [[DOTSPLAT20]] -; CHECK-NEXT: [[INDUCTION21:%.*]] = add <8 x i16> [[DOTSPLAT18]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = mul i16 [[TMP0]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP21]], i64 0 -; CHECK-NEXT: [[DOTSPLAT23:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT27]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[L]], 8 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[L]], [[N_MOD_VF4]] +; CHECK-NEXT: [[DOTCAST7:%.*]] = trunc i64 [[N_VEC5]] to i16 +; CHECK-NEXT: [[IND_END8:%.*]] = mul i16 [[DOTCAST7]], [[TMP0]] +; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT13]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 +; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> , [[DOTSPLAT16]] +; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT14]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8 +; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 +; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX16:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND24:%.*]] = phi <8 x i16> [ [[INDUCTION21]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX16]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = sub <8 x i16> [[VEC_IND24]], [[BROADCAST_SPLAT28]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[TMP24]], i32 0 -; CHECK-NEXT: store <8 x i16> [[TMP23]], ptr [[TMP25]], align 2 -; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX16]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT26]] = add <8 x i16> [[VEC_IND24]], [[DOTSPLAT23]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC8]] -; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i16> [ [[INDUCTION17]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX12]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 +; CHECK-NEXT: store <8 x i16> [[TMP17]], ptr [[TMP19]], align 2 +; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[L]], [[N_VEC8]] -; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[L]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N25]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i16 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i16 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[P_09:%.*]] = phi i16 [ [[BC_RESUME_VAL14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P_09:%.*]] = phi i16 [ [[BC_RESUME_VAL11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ADD]] = sub i16 [[P_09]], [[OFF]] ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[IV]] ; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2 @@ -240,6 +238,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK: exit: ; CHECK-NEXT: ret void ; + + entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index ca38d2747760076..0e511cfc9bffed6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -225,12 +225,12 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP1]] = sub nsw <2 x i64> zeroinitializer, [[STEP_ADD]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 ; CHECK-NEXT: store i64 [[TMP2]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 36 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -303,16 +303,16 @@ define void @for_iv_trunc_optimized(ptr %dst) { ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], splat (i32 3) +; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], splat (i32 3) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3 ; CHECK-NEXT: store i32 [[TMP6]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 336 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 54dd9c870a17097..39fa87fbb569f65 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -32,9 +32,9 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 4.000000e+00) +; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 8.000000e+00) +; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 1.200000e+01) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32 ; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 64 @@ -44,7 +44,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD3]], ptr [[TMP4]], align 4 ; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], splat (float 1.600000e+01) ; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTO_VEC: middle.block: @@ -207,9 +207,9 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 1.200000e+01) +; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 2.400000e+01) +; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 3.600000e+01) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 ; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64 @@ -219,7 +219,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], ptr [[TMP4]], align 8 ; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], splat (double 4.800000e+01) ; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AUTO_VEC: middle.block: @@ -371,9 +371,9 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], -; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], +; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], splat (float 3.360000e+02) +; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], splat (float 3.360000e+02) +; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 32 ; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 64 @@ -391,7 +391,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4 ; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], +; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; AUTO_VEC: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 6516b05ab4ede91..cc190fb826911e1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -38,7 +38,7 @@ define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; AVX512-NEXT: [[TMP6:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD]] to <16 x i64> ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], <16 x i64> [[TMP6]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP7]], i32 4, <16 x i1> [[TMP3]], <16 x float> poison) -; AVX512-NEXT: [[TMP8:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER]], +; AVX512-NEXT: [[TMP8:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[OUT:%.*]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[TMP9]], i32 0 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP8]], ptr [[TMP10]], i32 4, <16 x i1> [[TMP3]]) @@ -64,7 +64,7 @@ define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: [[TMP6:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD]] to <2 x i64> ; FVW2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], <2 x i64> [[TMP6]] ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP7]], i32 4, <2 x i1> [[TMP3]], <2 x float> poison) -; FVW2-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[OUT:%.*]], i64 [[TMP0]] ; FVW2-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[TMP9]], i32 0 ; FVW2-NEXT: call void @llvm.masked.store.v2f32.p0(<2 x float> [[TMP8]], ptr [[TMP10]], i32 4, <2 x i1> [[TMP3]]) @@ -124,15 +124,15 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; AVX512: for.end: @@ -156,7 +156,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -174,7 +174,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FVW2: for.end: @@ -231,15 +231,15 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER1]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER1]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AVX512: for.end: @@ -263,7 +263,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -281,7 +281,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE2]] ; FVW2: pred.store.continue2: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FVW2: for.end: @@ -325,15 +325,15 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1(<16 x ptr addrspace(1)> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; AVX512: for.end: @@ -357,7 +357,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1(<2 x ptr addrspace(1)> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -375,7 +375,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FVW2: for.end: @@ -418,15 +418,15 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1(<16 x ptr addrspace(1)> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AVX512: for.end: @@ -450,7 +450,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1(<2 x ptr addrspace(1)> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -468,7 +468,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FVW2: for.end: @@ -511,15 +511,15 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; AVX512: for.end: @@ -543,7 +543,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -561,7 +561,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FVW2: for.end: @@ -652,11 +652,11 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP18]], align 4, !alias.scope [[META8:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP14]], i32 4, <16 x i1> ), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP14]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] ; AVX512-NEXT: [[TMP19:%.*]] = getelementptr float, ptr [[TMP16]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP19]], align 4, !alias.scope [[META15:![0-9]+]] ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, <16 x ptr> [[TMP14]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> ), !alias.scope [[META11]], !noalias [[META13]] +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 1024 ; AVX512-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -691,12 +691,12 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM]] ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD22:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD22]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> ), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD23:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD23]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]] ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr float, ptr [[TMP28]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD23:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD24:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]] ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD23]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> ), !alias.scope [[META20]], !noalias [[META22]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD24]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20]], !noalias [[META22]] ; AVX512-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX18]], 8 ; AVX512-NEXT: [[PTR_IND20]] = getelementptr i8, ptr [[POINTER_PHI19]], i64 512 ; AVX512-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll index 637b985b4562ed6..a90b4df3999f2a1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll @@ -18,8 +18,8 @@ define void @gep_use_in_dead_block(ptr noalias %dst, ptr %src) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 10) +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP2]] @@ -97,13 +97,13 @@ define void @gep_use_outside_loop(ptr noalias %dst, ptr %src) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 10) +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <4 x i1> [[TMP5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index 8200e7df2a26023..5ba559af077ca6f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -31,9 +31,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV33:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access [[META0:![0-9]+]] ; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP3]], 3 -; CHECK-NEXT: store i32 [[ADD10_US]], ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: store i32 [[ADD10_US]], ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 ; CHECK-NEXT: [[EXITCOND36:%.*]] = icmp eq i32 [[LFTR_WIDEIV35]], [[M]] @@ -44,9 +44,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP4]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD4_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP5]], 1 -; CHECK-NEXT: store i32 [[ADD5_US]], ptr [[ARRAYIDX7_US:%.*]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: store i32 [[ADD5_US]], ptr [[ARRAYIDX7_US:%.*]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 ; CHECK-NEXT: [[EXITCOND32:%.*]] = icmp eq i32 [[LFTR_WIDEIV31]], [[M]] @@ -77,9 +77,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP17]], i32 3 -; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 0c2ec268ad20708..2dac60a0fcfcba1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -80,8 +80,8 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i32 2 ; SSE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; SSE-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 -; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], -; SSE-NEXT: [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], +; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) +; SSE-NEXT: [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], splat (double 4.200000e+01) ; SSE-NEXT: [[TMP8:%.*]] = fadd fast <2 x double> [[VEC_PHI]], [[WIDE_LOAD]] ; SSE-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[VEC_PHI1]], [[WIDE_LOAD2]] ; SSE-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP6]], <2 x double> [[TMP8]], <2 x double> [[VEC_PHI]] @@ -139,10 +139,10 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x double>, ptr [[TMP9]], align 8 ; AVX-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x double>, ptr [[TMP10]], align 8 ; AVX-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP11]], align 8 -; AVX-NEXT: [[TMP12:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD]], -; AVX-NEXT: [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], -; AVX-NEXT: [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], -; AVX-NEXT: [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], +; AVX-NEXT: [[TMP12:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) +; AVX-NEXT: [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], splat (double 4.200000e+01) +; AVX-NEXT: [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], splat (double 4.200000e+01) +; AVX-NEXT: [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], splat (double 4.200000e+01) ; AVX-NEXT: [[TMP16:%.*]] = fadd fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] ; AVX-NEXT: [[TMP17:%.*]] = fadd fast <4 x double> [[VEC_PHI1]], [[WIDE_LOAD4]] ; AVX-NEXT: [[TMP18:%.*]] = fadd fast <4 x double> [[VEC_PHI2]], [[WIDE_LOAD5]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index c0098eb533c00d6..f415a81d3b7c73e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -18,23 +18,23 @@ define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], -; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], -; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], splat (i32 8) +; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], splat (i32 8) +; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], splat (i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD2]] -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VEC_IND4]], <8 x ptr> [[TMP1]], i32 8, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD5]], <8 x ptr> [[TMP2]], i32 8, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD6]], <8 x ptr> [[TMP3]], i32 8, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD7]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VEC_IND4]], <8 x ptr> [[TMP1]], i32 8, <8 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD5]], <8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD6]], <8 x ptr> [[TMP3]], i32 8, <8 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD7]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], splat (i32 8) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -88,24 +88,24 @@ define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> splat (i16 10) +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 -; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP6]], align 2, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] -; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP7]], align 2, !alias.scope [[META4]], !noalias [[META7]] +; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP6]], align 2, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP7]], align 2, !alias.scope [[META4]], !noalias [[META7]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 ; CHECK-NEXT: store <4 x i32> [[VEC_IND3]], ptr [[TMP10]], align 4, !alias.scope [[META7]] ; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) +; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], splat (i32 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: @@ -158,18 +158,18 @@ define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], splat (i16 8) ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> splat (i16 10) +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8 ; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], splat (i16 8) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: @@ -273,7 +273,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = zext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> ; CHECK-NEXT: [[TMP22]] = add <16 x i32> [[TMP19]], [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP22]], <16 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], +; CHECK-NEXT: [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], splat (i32 1) ; CHECK-NEXT: [[TMP25:%.*]] = trunc <16 x i32> [[TMP24]] to <16 x i8> ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP25]], i32 0 ; CHECK-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META16:![0-9]+]], !noalias [[META13]] @@ -382,11 +382,11 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16> ; CHECK-NEXT: [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -441,18 +441,18 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <8 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], -; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], -; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD]] ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD1]] @@ -471,7 +471,7 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8) ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -571,7 +571,7 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -665,7 +665,7 @@ define void @wombat(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -739,7 +739,7 @@ define void @wombat2(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -816,7 +816,7 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index ad0068dc3f6be7c..fd89c8b01934156 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -485,7 +485,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> , <4 x i32> poison), !alias.scope [[META6:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]] ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> @@ -495,7 +495,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> ; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -603,7 +603,7 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -725,7 +725,7 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll index 717dbe359f1050d..71b3f296e6dfbfd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll @@ -7,8 +7,8 @@ target triple = "x86_64-apple-macos" ; that store into the last store (by creating an interleaved store group). This ; means the loaded %l2 will have incorrect value. define void @avoid_sinking_store_across_load(ptr %arr) { -; CHECK-LABEL: define void @avoid_sinking_store_across_load -; CHECK-SAME: (ptr [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-LABEL: define void @avoid_sinking_store_across_load( +; CHECK-SAME: ptr [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -24,22 +24,22 @@ define void @avoid_sinking_store_across_load(ptr %arr) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]] -; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP8]], <4 x ptr> [[TMP7]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0 -; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]] +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[STRIDED_VEC]], splat (i32 25) +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP7]], <4 x ptr> [[TMP6]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP11]], <4 x ptr> [[TMP5]], i32 4, <4 x i1> ) +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 12) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 8e934dcfae55100..1433e48690bc60b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -31,9 +31,9 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], splat (i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] @@ -57,7 +57,7 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[TMP18]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI11]] ; CHECK-NEXT: [[TMP19]] = add <4 x i32> [[VEC_PHI6]], [[PREDPHI12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], splat (i64 4) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll index ecae00807f4e722..e945ad40039af89 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll @@ -17,18 +17,18 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <8 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], -; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], -; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD]] ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD1]] @@ -47,7 +47,7 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8) ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -181,8 +181,8 @@ define void @test_scalar_cost_single_store_loop_varying_cond(ptr %dst, ptr noali ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC4]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], splat (i32 123) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], splat (i32 123) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP10]], i32 4, <4 x i1> [[TMP8]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index d2600cd59b6dfec..11383aad4e90bd1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -39,7 +39,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 -; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], +; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison) @@ -102,10 +102,10 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], -; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], -; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], +; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 8 @@ -185,10 +185,10 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 ; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 -; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], -; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], -; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], +; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] ; AVX512-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 @@ -227,7 +227,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP29]], align 4 -; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], +; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP31]], i32 0 ; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP32]], i32 4, <8 x i1> [[TMP30]], <8 x i32> poison) @@ -314,7 +314,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 0 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP4]], align 4 -; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], +; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP6]], i32 0 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison) @@ -377,10 +377,10 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP6]], align 4 ; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP7]], align 4 -; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], -; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], -; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], +; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 0 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 8 @@ -460,10 +460,10 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP6]], align 4 ; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP7]], align 4 -; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], -; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], -; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], +; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] ; AVX512-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 0 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 @@ -502,7 +502,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP28]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP29]], align 4 -; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], +; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP31]], i32 0 ; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP32]], i32 4, <8 x i1> [[TMP30]], <8 x i32> poison) @@ -598,7 +598,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 -; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], +; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x float> poison) @@ -663,10 +663,10 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], -; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], -; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], +; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i32 0 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 8 @@ -751,10 +751,10 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 ; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 -; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], -; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], -; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], +; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] ; AVX512-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i32 0 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 @@ -797,7 +797,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP31]] ; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4 -; AVX512-NEXT: [[TMP34:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], +; AVX512-NEXT: [[TMP34:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP31]] ; AVX512-NEXT: [[TMP36:%.*]] = getelementptr float, ptr [[TMP35]], i32 0 ; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP36]], i32 4, <8 x i1> [[TMP34]], <8 x float> poison) @@ -904,10 +904,10 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] ; AVX-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] ; AVX-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META8]] -; AVX-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], -; AVX-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], -; AVX-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], -; AVX-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], +; AVX-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) +; AVX-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]] ; AVX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0 ; AVX-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 @@ -992,10 +992,10 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META11]] ; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META11]] ; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META11]] -; AVX512-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], -; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], -; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], +; AVX512-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX512-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) +; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 @@ -1137,9 +1137,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <8 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP0]], i32 4, <8 x i1> , <8 x i32> poison), !alias.scope [[META21:![0-9]+]] -; AVX512-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], -; AVX512-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP0]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !alias.scope [[META21:![0-9]+]] +; AVX512-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100) +; AVX512-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], splat (i64 1) ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[B]], <8 x i64> [[TMP2]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP3]], i32 8, <8 x i1> [[TMP1]], <8 x double> poison), !alias.scope [[META24:![0-9]+]] ; AVX512-NEXT: [[TMP4:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER]] to <8 x double> @@ -1147,7 +1147,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[A]], <8 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> [[TMP5]], <8 x ptr> [[TMP6]], i32 8, <8 x i1> [[TMP1]]), !alias.scope [[META26:![0-9]+]], !noalias [[META28:![0-9]+]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 128) ; AVX512-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624 ; AVX512-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; AVX512: middle.block: @@ -1306,10 +1306,10 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP22]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META21]] ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], -; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], -; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], -; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], +; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], splat (double 5.000000e-01) +; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], splat (double 5.000000e-01) +; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01) +; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP0]] ; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 ; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3 @@ -1418,10 +1418,10 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP22]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], -; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], -; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], -; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], +; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], splat (double 5.000000e-01) +; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], splat (double 5.000000e-01) +; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], splat (double 5.000000e-01) +; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -7 @@ -1527,18 +1527,18 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; AVX1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], -; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], -; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], -; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], +; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX1-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer ; AVX1-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer ; AVX1-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer -; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], -; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], -; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], +; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) +; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) ; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX1-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 @@ -1552,10 +1552,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX1-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX1-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], -; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], -; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], -; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], +; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) +; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) +; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) +; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) ; AVX1-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer @@ -1565,10 +1565,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 ; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) ; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX1-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -1627,18 +1627,18 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; AVX2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], -; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], -; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], +; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer ; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer -; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], -; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], -; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], +; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) +; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 @@ -1652,10 +1652,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX2-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX2-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], -; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], -; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], -; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], +; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) +; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) +; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) +; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) ; AVX2-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer @@ -1665,10 +1665,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX2-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -1727,18 +1727,18 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 ; AVX512-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 -; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], -; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], -; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], +; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX512-NEXT: [[TMP10:%.*]] = icmp eq <8 x i8> [[TMP6]], zeroinitializer ; AVX512-NEXT: [[TMP11:%.*]] = icmp eq <8 x i8> [[TMP7]], zeroinitializer ; AVX512-NEXT: [[TMP12:%.*]] = icmp eq <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp eq <8 x i8> [[TMP9]], zeroinitializer -; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], -; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], -; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], -; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], +; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], splat (i1 true) +; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], splat (i1 true) +; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], splat (i1 true) +; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], splat (i1 true) ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 @@ -1752,10 +1752,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP24:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX512-NEXT: [[TMP25:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX512-NEXT: [[TMP26:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], -; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], -; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], -; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], +; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], splat (i1 true) +; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], splat (i1 true) +; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], splat (i1 true) +; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], splat (i1 true) ; AVX512-NEXT: [[TMP31:%.*]] = select <8 x i1> [[TMP14]], <8 x i1> [[TMP27]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP15]], <8 x i1> [[TMP28]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer @@ -1765,10 +1765,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 ; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX512-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] @@ -1872,18 +1872,18 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; AVX1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], -; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], -; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], -; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], +; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX1-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer ; AVX1-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer ; AVX1-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer -; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], -; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], -; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], +; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) +; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) ; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX1-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 @@ -1897,10 +1897,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX1-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX1-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], -; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], -; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], -; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], +; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) +; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) +; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) +; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) ; AVX1-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer @@ -1910,10 +1910,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 ; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) ; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX1-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -1972,18 +1972,18 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; AVX2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], -; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], -; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], +; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer ; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer -; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], -; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], -; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], +; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) +; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 @@ -1997,10 +1997,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX2-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX2-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], -; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], -; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], -; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], +; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) +; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) +; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) +; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) ; AVX2-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer @@ -2010,10 +2010,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX2-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -2072,18 +2072,18 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 ; AVX512-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 -; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], -; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], -; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], +; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX512-NEXT: [[TMP10:%.*]] = icmp eq <8 x i8> [[TMP6]], zeroinitializer ; AVX512-NEXT: [[TMP11:%.*]] = icmp eq <8 x i8> [[TMP7]], zeroinitializer ; AVX512-NEXT: [[TMP12:%.*]] = icmp eq <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp eq <8 x i8> [[TMP9]], zeroinitializer -; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], -; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], -; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], -; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], +; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], splat (i1 true) +; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], splat (i1 true) +; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], splat (i1 true) +; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], splat (i1 true) ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 @@ -2097,10 +2097,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP24:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX512-NEXT: [[TMP25:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX512-NEXT: [[TMP26:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], -; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], -; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], -; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], +; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], splat (i1 true) +; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], splat (i1 true) +; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], splat (i1 true) +; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], splat (i1 true) ; AVX512-NEXT: [[TMP31:%.*]] = select <8 x i1> [[TMP14]], <8 x i1> [[TMP27]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP15]], <8 x i1> [[TMP28]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer @@ -2110,10 +2110,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 ; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX512-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index 3226f72d51d2e4b..a12aed7239c9df3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -21,12 +21,12 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], splat (i32 202) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]], <64 x i8> poison) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> , <64 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> splat (i8 2), <64 x i8> splat (i8 1) ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 @@ -60,12 +60,12 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i64 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[VEC_IV:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], +; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], splat (i32 202) ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; AUTOVF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]], <32 x i8> poison) ; AUTOVF-NEXT: [[TMP4:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer -; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> , <32 x i8> +; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 2), <32 x i8> splat (i8 1) ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 @@ -121,12 +121,12 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], splat (i32 202) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]], <64 x i8> poison) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> , <64 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> splat (i8 2), <64 x i8> splat (i8 1) ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 @@ -160,12 +160,12 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i64 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[VEC_IV:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], +; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], splat (i32 202) ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; AUTOVF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]], <32 x i8> poison) ; AUTOVF-NEXT: [[TMP4:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer -; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> , <32 x i8> +; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 2), <32 x i8> splat (i8 1) ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 @@ -225,12 +225,12 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <64 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <64 x i32> [[TMP1]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <64 x i32> @llvm.masked.gather.v64i32.v64p0(<64 x ptr> [[TMP2]], i32 4, <64 x i1> , <64 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <64 x i32> @llvm.masked.gather.v64i32.v64p0(<64 x ptr> [[TMP2]], i32 4, <64 x i1> splat (i1 true), <64 x i32> poison) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; CHECK-NEXT: store <64 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], splat (i32 64) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -264,12 +264,12 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = mul nsw <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <8 x i32> [[TMP1]] -; AUTOVF-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison) +; AUTOVF-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; AUTOVF-NEXT: store <8 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], align 4 ; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; AUTOVF-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; AUTOVF-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AUTOVF: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 7aa9d54c85472a5..6480c0ab1099d70 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -27,7 +27,7 @@ ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] @@ -35,15 +35,15 @@ ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> %[[InnerPhi]], -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) +; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) +; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body @@ -56,7 +56,7 @@ ; AVX: %[[VecInd:.*]] = phi <8 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> %[[VecInd]] ; AVX: %[[VecIndTr:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32> -; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[VecIndTr]], <8 x ptr> %[[AAddr]], i32 4, <8 x i1> ) +; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[VecIndTr]], <8 x ptr> %[[AAddr]], i32 4, <8 x i1> splat (i1 true)) ; AVX: %[[VecIndTr2:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32> ; AVX: %[[StoreVal:.*]] = add nsw <8 x i32> %[[VecIndTr2]], %[[Splat]] ; AVX: br label %[[InnerLoop:.+]] @@ -64,14 +64,14 @@ ; AVX: [[InnerLoop]]: ; AVX: %[[InnerPhi:.*]] = phi <8 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]] -; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[StoreVal]], <8 x ptr> %[[AAddr2]], i32 4, <8 x i1> %[[InnerPhi]], -; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], +; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[StoreVal]], <8 x ptr> %[[AAddr2]], i32 4, <8 x i1> splat (i1 true)) +; AVX: %[[InnerPhiNext]] = add nuw nsw <8 x i64> %[[InnerPhi]], splat (i64 1) +; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], splat (i64 8) ; AVX: %[[InnerCond:.*]] = extractelement <8 x i1> %[[VecCond]], i32 0 ; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; AVX: [[ForInc]]: -; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], +; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8) ; AVX: %[[IndNext]] = add nuw i64 %[[Ind]], 8 ; AVX: br i1 true, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll index 5da01a24631bcee..bb7fe4d4f1e5690 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll @@ -21,8 +21,8 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE15:.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[PRED_SDIV_CONTINUE15]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_SDIV_CONTINUE15]] ] -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] ; CHECK: [[PRED_SDIV_IF]]: @@ -76,8 +76,8 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y ; CHECK: [[PRED_SDIV_CONTINUE15]]: ; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP17]], %[[PRED_SDIV_CONTINUE13]] ], [ [[TMP20]], %[[PRED_SDIV_IF14]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i16> zeroinitializer, <4 x i16> [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], -; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], +; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true) +; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true) ; CHECK-NEXT: [[TMP24]] = or <4 x i1> [[VEC_PHI]], [[TMP22]] ; CHECK-NEXT: [[TMP25]] = or <4 x i1> [[VEC_PHI1]], [[TMP23]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index fad9a87e5a01d7d..ee8374f952c7a39 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -26,12 +26,12 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8, !alias.scope !3 +; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -53,7 +53,7 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK-NEXT: [[TMP12]] = add nuw nsw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], 80 ; CHECK-NEXT: [[CAN_IV_NEXT]] = add nuw nsw i64 [[CAN_IV]], 1 -; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll index c05c94aa881218b..d907c76efd255bb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll @@ -29,7 +29,7 @@ define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> , <4 x ptr> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -87,7 +87,7 @@ define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> , <4 x ptr> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index c139afae9f23094..c91ead00a950d0d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -10,7 +10,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -33,7 +33,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label %vector.body ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index fa69fefba515f32..8321cbcb5a1d80e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -32,27 +32,27 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], splat (i64 4) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 0 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP18]], i32 8, <4 x i1> ), !tbaa [[TBAA10:![0-9]+]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP19]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP20]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP21]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP18]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP19]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP20]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP21]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 1 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP22]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP23]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP22]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP23]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll index cc820f2a8f2a05f..a5eb42ae184a7e2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll @@ -15,7 +15,7 @@ define void @test_pr55096(i64 %c, ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DOTCAST]], 2008 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 6229, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -37,9 +37,9 @@ define void @test_pr55096(i64 %c, ptr %p) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]] ; CHECK: pred.store.continue3: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 340 -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index b88f413ff1b81c5..df9ba2194139b81 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -75,10 +75,10 @@ define void @test(ptr %p) { ; VEC-NEXT: store i64 0, ptr [[TMP24]], align 8 ; VEC-NEXT: store i64 0, ptr [[TMP25]], align 8 ; VEC-NEXT: store i64 0, ptr [[TMP26]], align 8 -; VEC-NEXT: [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], +; VEC-NEXT: [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) ; VEC-NEXT: [[TMP28]] = zext <4 x i16> [[TMP27]] to <4 x i64> ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; VEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VEC: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 3f38abc75a5837a..73bdae71654f12e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -25,8 +25,8 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], splat (i64 8) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1 @@ -34,13 +34,13 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]]) +; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]], !prof [[PROF5:![0-9]+]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB5:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] @@ -48,7 +48,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 ; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF6:![0-9]+]] ; CHECK: bb18: ; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]] @@ -57,7 +57,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90 -; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: bb6: ; CHECK-NEXT: ret void ; @@ -101,7 +101,8 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1} -; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} +; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 3} +; CHECK: [[PROF6]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll index dc9c3e27fe08f2c..baacad482f96267 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll @@ -27,10 +27,10 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) { ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] ; COST-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1 -; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], +; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) ; COST-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]] -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]]) ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; COST-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; COST-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -83,14 +83,14 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]] ; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]] -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -166,12 +166,12 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] ; COST-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1 -; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], +; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP7]] -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]]) ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; COST-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; COST-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -225,18 +225,18 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], -; FORCED-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) +; FORCED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; FORCED-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) ; FORCED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP9]] ; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP10]] -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -350,26 +350,26 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer ; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]] ; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]] ; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]] ; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP14]] -; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], -; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]]) +; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true) +; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -468,17 +468,17 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] ; COST-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1 -; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], +; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) ; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer -; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], -; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], +; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP13]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP6]], i32 1, <4 x i1> [[TMP13]]) ; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]]) -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP7]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP7]]) ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; COST-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; COST-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -544,28 +544,28 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer -; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], -; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], -; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], -; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], +; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) ; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP20]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP21]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP7]], i32 1, <4 x i1> [[TMP21]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]]) ; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -706,16 +706,16 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) ; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer -; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) +; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 14) +; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 14) +; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 15) +; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 15) ; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP23]], [[TMP25]] ; FORCED-NEXT: [[TMP28:%.*]] = or <4 x i1> [[TMP24]], [[TMP26]] ; FORCED-NEXT: [[TMP21:%.*]] = or <4 x i1> [[TMP13]], [[TMP17]] @@ -724,14 +724,14 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP22]], [[TMP16]] ; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP27]], [[TMP35]] ; FORCED-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP28]], [[TMP36]] -; FORCED-NEXT: [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], -; FORCED-NEXT: [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], +; FORCED-NEXT: [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], splat (i1 true) +; FORCED-NEXT: [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], splat (i1 true) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP35]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP36]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -857,20 +857,20 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP11]] ; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP12]] -; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], -; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], +; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true) +; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -965,16 +965,16 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, ; COST-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1 ; COST-NEXT: [[TMP7:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], +; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) ; COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]] -; COST-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], +; COST-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) ; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]]) -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]]) -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]]) ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -1041,26 +1041,26 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; FORCED-NEXT: [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]] -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP15]], [[TMP25]] ; FORCED-NEXT: [[TMP20:%.*]] = or <4 x i1> [[TMP16]], [[TMP26]] -; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP27]], -; FORCED-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP20]], +; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP27]], splat (i1 true) +; FORCED-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP20]], splat (i1 true) ; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP25]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP26]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -1194,18 +1194,18 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP25]] ; FORCED-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP10]], [[TMP26]] -; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], -; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP14]], +; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) +; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true) ; FORCED-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; FORCED-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]] -; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], -; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], +; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true) +; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true) ; FORCED-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP28:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP25]] @@ -1214,12 +1214,12 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP30]]) ; FORCED-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]]) ; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]] ; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]] -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -1348,24 +1348,24 @@ define void @large_number_of_cases(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 1) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3) +; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 11) +; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 11) +; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 99) +; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 99) +; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 213) +; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 213) +; FORCED-NEXT: [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 238) +; FORCED-NEXT: [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 238) +; FORCED-NEXT: [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 513) +; FORCED-NEXT: [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 513) +; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 791) +; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 791) +; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 899) +; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 899) ; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]] ; FORCED-NEXT: [[TMP28:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]] ; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP13]] @@ -1382,8 +1382,8 @@ define void @large_number_of_cases(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]] ; FORCED-NEXT: [[TMP57:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]] ; FORCED-NEXT: [[TMP58:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]] -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP59]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 9cd990b9878ce69..95c74d19dd2dbc7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -124,8 +124,8 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0 @@ -191,8 +191,8 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0 @@ -265,8 +265,8 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 @@ -348,8 +348,8 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index 14e29e0d0518a17..8d56c3386a3b1bf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -18,8 +18,8 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]] ; CHECK: [[PRED_UREM_IF]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index ce460f4fe35425f..414460872ba13bc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -15,23 +15,253 @@ target triple = "x86_64-apple-macosx10.11.0" ; Function Attrs: norecurse nounwind ssp uwtable define void @_Z3fn1v() #0 { ; CHECK-LABEL: @_Z3fn1v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @c, align 4 +; CHECK-NEXT: [[CMP34:%.*]] = icmp sgt i32 [[TMP0]], 8 +; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @a, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @b, align 8 +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[TMP2]], 4063299859190 +; CHECK-NEXT: [[TOBOOL6:%.*]] = icmp eq i64 [[MUL]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[ITER_CHECK27:%.*]], label [[ITER_CHECK:%.*]] +; CHECK: iter.check: +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], -9 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP6]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = add i64 8, [[TMP7]] +; CHECK-NEXT: [[IND_END4:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT4:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> , [[VEC_IND]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> , <16 x ptr> [[TMP13]], i32 16, <16 x i1> ) -; CHECK-NEXT: [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP13]], i32 16, <16 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], splat (i64 1) ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> , <16 x ptr> [[TMP16]], i32 8, <16 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP16]], i32 8, <16 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], -; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <16 x i64> [[VEC_IND3]], splat (i64 32) +; CHECK-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP63]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT99:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[IND_END12:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END9:%.*]] = add i64 8, [[TMP64]] +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP6]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF6:%.*]] = urem i64 [[TMP6]], 8 +; CHECK-NEXT: [[N_VEC7:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF6]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[N_VEC7]], 2 +; CHECK-NEXT: [[IND_END8:%.*]] = add i64 8, [[TMP17]] +; CHECK-NEXT: [[IND_END11:%.*]] = mul i64 [[N_VEC7]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[DOTSPLATINSERT17:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL5]], i64 0 +; CHECK-NEXT: [[DOTSPLAT18:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT17]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION19:%.*]] = add <8 x i64> [[DOTSPLAT18]], +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <8 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i64> [ [[INDUCTION19]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND15]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND15]] +; CHECK-NEXT: [[TMP20:%.*]] = add nsw <8 x i64> [[TMP18]], [[VEC_IND20]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP20]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP21]], i32 16, <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP22:%.*]] = or disjoint <8 x i64> [[VEC_IND20]], splat (i64 1) +; CHECK-NEXT: [[TMP23:%.*]] = add nsw <8 x i64> [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP23]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP24]], i32 8, <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[INDEX_NEXT22]] = add nuw i64 [[INDEX14]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <8 x i64> [[VEC_IND15]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i64> [[VEC_IND20]], splat (i64 16) +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT22]], [[N_VEC7]] +; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[CMP_N23:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC7]] +; CHECK-NEXT: br i1 [[CMP_N23]], label [[FOR_COND_CLEANUP_LOOPEXIT99]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: iter.check27: +; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP3]], -9 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP26]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK25:%.*]] = icmp ult i64 [[TMP28]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label [[VEC_EPILOG_SCALAR_PH46:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK29:%.*]] +; CHECK: vector.main.loop.iter.check29: +; CHECK-NEXT: [[MIN_ITERS_CHECK28:%.*]] = icmp ult i64 [[TMP28]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK28]], label [[VEC_EPILOG_PH47:%.*]], label [[VECTOR_PH30:%.*]] +; CHECK: vector.ph30: +; CHECK-NEXT: [[N_MOD_VF31:%.*]] = urem i64 [[TMP28]], 16 +; CHECK-NEXT: [[N_VEC32:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF31]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[TOBOOL6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[N_VEC32]], 2 +; CHECK-NEXT: [[IND_END41:%.*]] = add i64 8, [[TMP29]] +; CHECK-NEXT: [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2 +; CHECK-NEXT: br label [[VECTOR_BODY33:%.*]] +; CHECK: vector.body33: +; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH30]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY33]] ] +; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY33]] ] +; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY33]] ] +; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND35]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]] +; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP32]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[TMP34]]) +; CHECK-NEXT: [[TMP35:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1) +; CHECK-NEXT: [[TMP36:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP36]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP37]], i32 8, <16 x i1> [[TMP34]]) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[BROADCAST_SPLAT]]) +; CHECK-NEXT: [[TMP38:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1) +; CHECK-NEXT: [[TMP39:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP39]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP40]], i32 8, <16 x i1> [[BROADCAST_SPLAT]]) +; CHECK-NEXT: [[INDEX_NEXT39]] = add nuw i64 [[INDEX34]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] +; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK24:%.*]], label [[VECTOR_BODY33]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block24: +; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] +; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK48:%.*]] +; CHECK: vec.epilog.iter.check48: +; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 +; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 +; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] +; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH46]], label [[VEC_EPILOG_PH47]] +; CHECK: vec.epilog.ph47: +; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] +; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] +; CHECK-NEXT: [[N_MOD_VF52:%.*]] = urem i64 [[TMP28]], 8 +; CHECK-NEXT: [[N_VEC53:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF52]] +; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[N_VEC53]], 2 +; CHECK-NEXT: [[IND_END54:%.*]] = add i64 8, [[TMP43]] +; CHECK-NEXT: [[IND_END57:%.*]] = mul i64 [[N_VEC53]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT62:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL42]], i64 0 +; CHECK-NEXT: [[DOTSPLAT63:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT62]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION64:%.*]] = add <8 x i64> [[DOTSPLAT63]], +; CHECK-NEXT: [[DOTSPLATINSERT67:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL44]], i64 0 +; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT72:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT72]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY60:%.*]] +; CHECK: vec.epilog.vector.body60: +; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH47]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ] +; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ] +; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ] +; CHECK-NEXT: [[TMP44:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND65]] +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]] +; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP46]], i64 0 +; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], splat (i1 true) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[TMP48]]) +; CHECK-NEXT: [[TMP49:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1) +; CHECK-NEXT: [[TMP50:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP49]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP50]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP51]], i32 8, <8 x i1> [[TMP48]]) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[BROADCAST_SPLAT73]]) +; CHECK-NEXT: [[TMP52:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1) +; CHECK-NEXT: [[TMP53:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP53]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP54]], i32 8, <8 x i1> [[BROADCAST_SPLAT73]]) +; CHECK-NEXT: [[INDEX_NEXT74]] = add nuw i64 [[INDEX61]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) +; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] +; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK45:%.*]], label [[VEC_EPILOG_VECTOR_BODY60]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: vec.epilog.middle.block45: +; CHECK-NEXT: [[CMP_N75:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] +; CHECK-NEXT: br i1 [[CMP_N75]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH46]] +; CHECK: vec.epilog.scalar.ph46: +; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[ITER_CHECK27]] ] +; CHECK-NEXT: [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[ITER_CHECK27]] ] +; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] +; CHECK: for.body.us: +; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH46]] ] +; CHECK-NEXT: [[INDVARS_IV70:%.*]] = phi i64 [ [[INDVARS_IV_NEXT71:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] ], [ [[BC_RESUME_VAL59]], [[VEC_EPILOG_SCALAR_PH46]] ] +; CHECK-NEXT: [[TMP56:%.*]] = sub nsw i64 8, [[INDVARS_IV78]] +; CHECK-NEXT: [[ADD_PTR_US:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV78]] +; CHECK-NEXT: [[TMP57:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV70]] +; CHECK-NEXT: [[ARRAYDECAY_US_US_US:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP57]], i64 0 +; CHECK-NEXT: br i1 [[TOBOOL6]], label [[FOR_BODY5_US_US_US_PREHEADER:%.*]], label [[FOR_BODY5_US_US48_PREHEADER:%.*]] +; CHECK: for.body5.us.us48.preheader: +; CHECK-NEXT: store i32 8, ptr [[ARRAYDECAY_US_US_US]], align 16 +; CHECK-NEXT: [[INDVARS_IV_NEXT66:%.*]] = or disjoint i64 [[INDVARS_IV70]], 1 +; CHECK-NEXT: [[TMP58:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV_NEXT66]] +; CHECK-NEXT: [[ARRAYDECAY_US_US55_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP58]], i64 0 +; CHECK-NEXT: store i32 8, ptr [[ARRAYDECAY_US_US55_1]], align 8 +; CHECK-NEXT: br label [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] +; CHECK: for.body5.us.us.us.preheader: +; CHECK-NEXT: store i32 7, ptr [[ARRAYDECAY_US_US_US]], align 16 +; CHECK-NEXT: [[INDVARS_IV_NEXT73:%.*]] = or disjoint i64 [[INDVARS_IV70]], 1 +; CHECK-NEXT: [[TMP59:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV_NEXT73]] +; CHECK-NEXT: [[ARRAYDECAY_US_US_US_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP59]], i64 0 +; CHECK-NEXT: store i32 7, ptr [[ARRAYDECAY_US_US_US_1]], align 8 +; CHECK-NEXT: br label [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] +; CHECK: for.cond.cleanup4.us-lcssa.us.us: +; CHECK-NEXT: [[INDVARS_IV_NEXT79]] = add nuw nsw i64 [[INDVARS_IV78]], 2 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT79]], [[TMP3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT71]] = add nuw nsw i64 [[INDVARS_IV70]], 2 +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_BODY_US]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup.loopexit99: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV95:%.*]] = phi i64 [ [[INDVARS_IV_NEXT96:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL10]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV87:%.*]] = phi i64 [ [[INDVARS_IV_NEXT88:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL13]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[TMP60:%.*]] = sub nsw i64 8, [[INDVARS_IV95]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV95]] +; CHECK-NEXT: [[TMP61:%.*]] = add nsw i64 [[TMP60]], [[INDVARS_IV87]] +; CHECK-NEXT: [[ARRAYDECAY_US31:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR]], i64 [[TMP61]], i64 0 +; CHECK-NEXT: store i32 8, ptr [[ARRAYDECAY_US31]], align 16 +; CHECK-NEXT: [[INDVARS_IV_NEXT90:%.*]] = or disjoint i64 [[INDVARS_IV87]], 1 +; CHECK-NEXT: [[TMP62:%.*]] = add nsw i64 [[TMP60]], [[INDVARS_IV_NEXT90]] +; CHECK-NEXT: [[ARRAYDECAY_US31_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR]], i64 [[TMP62]], i64 0 +; CHECK-NEXT: store i32 8, ptr [[ARRAYDECAY_US31_1]], align 8 +; CHECK-NEXT: [[INDVARS_IV_NEXT96]] = add nuw nsw i64 [[INDVARS_IV95]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT96]], [[TMP3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT88]] = add nuw nsw i64 [[INDVARS_IV87]], 2 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT99]], !llvm.loop [[LOOP7:![0-9]+]] ; entry: %0 = load i32, ptr @c, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index dc474fbf67ce8b1..99aaecf9a073c45 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -118,7 +118,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -410,7 +410,7 @@ define void @example23b(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw <4 x i32> [[TMP1]], splat (i32 7) ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -457,7 +457,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], splat (i64 257) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 4e9b97ef928b13f..9c42c3769ec177a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -164,10 +164,10 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP137:%.*]] = mul nsw <8 x i32> [[TMP103]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP138:%.*]] = mul nsw <8 x i32> [[TMP119]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP139:%.*]] = mul nsw <8 x i32> [[TMP135]], [[WIDE_LOAD6]] -; CHECK-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], -; CHECK-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], -; CHECK-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], +; CHECK-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], splat (i32 4) +; CHECK-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], splat (i32 4) +; CHECK-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], splat (i32 4) +; CHECK-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], splat (i32 4) ; CHECK-NEXT: [[TMP144]] = add <8 x i32> [[TMP140]], [[TMP136]] ; CHECK-NEXT: [[TMP145]] = add <8 x i32> [[TMP141]], [[TMP137]] ; CHECK-NEXT: [[TMP146]] = add <8 x i32> [[TMP142]], [[TMP138]] @@ -356,10 +356,10 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP137:%.*]] = mul nsw <8 x i32> [[TMP103]], [[WIDE_LOAD4]] ; MAX-BW-NEXT: [[TMP138:%.*]] = mul nsw <8 x i32> [[TMP119]], [[WIDE_LOAD5]] ; MAX-BW-NEXT: [[TMP139:%.*]] = mul nsw <8 x i32> [[TMP135]], [[WIDE_LOAD6]] -; MAX-BW-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], -; MAX-BW-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], -; MAX-BW-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], -; MAX-BW-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], +; MAX-BW-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], splat (i32 4) +; MAX-BW-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], splat (i32 4) +; MAX-BW-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], splat (i32 4) +; MAX-BW-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], splat (i32 4) ; MAX-BW-NEXT: [[TMP144]] = add <8 x i32> [[TMP140]], [[TMP136]] ; MAX-BW-NEXT: [[TMP145]] = add <8 x i32> [[TMP141]], [[TMP137]] ; MAX-BW-NEXT: [[TMP146]] = add <8 x i32> [[TMP142]], [[TMP138]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index 68c0b8cbc2c4a93..de906598775eb04 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -17,7 +17,7 @@ define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalia ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 429) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison) @@ -85,7 +85,7 @@ define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noali ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 429) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison) diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index fa0d529116d1091..45594b03353361a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -291,8 +291,8 @@ define void @uniform_copy(ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !alias.scope !12 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !alias.scope !15, !noalias !12 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META12:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] @@ -349,9 +349,9 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 @@ -376,10 +376,10 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP11]], align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i8> poison, i8 [[TMP15]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT11]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = urem <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP17:%.*]] = urem <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP18:%.*]] = urem <4 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP19:%.*]] = urem <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP16:%.*]] = urem <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[TMP17:%.*]] = urem <4 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[TMP18:%.*]] = urem <4 x i64> [[STEP_ADD_2]], splat (i64 8) +; CHECK-NEXT: [[TMP19:%.*]] = urem <4 x i64> [[STEP_ADD_3]], splat (i64 8) ; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i64> [[TMP16]] to <4 x i8> ; CHECK-NEXT: [[TMP21:%.*]] = trunc <4 x i64> [[TMP17]] to <4 x i8> ; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i64> [[TMP18]] to <4 x i8> @@ -388,10 +388,10 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[TMP25:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT8]], [[TMP21]] ; CHECK-NEXT: [[TMP26:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT10]], [[TMP22]] ; CHECK-NEXT: [[TMP27:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT12]], [[TMP23]] -; CHECK-NEXT: [[TMP28:%.*]] = and <4 x i8> [[TMP24]], -; CHECK-NEXT: [[TMP29:%.*]] = and <4 x i8> [[TMP25]], -; CHECK-NEXT: [[TMP30:%.*]] = and <4 x i8> [[TMP26]], -; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i8> [[TMP27]], +; CHECK-NEXT: [[TMP28:%.*]] = and <4 x i8> [[TMP24]], splat (i8 1) +; CHECK-NEXT: [[TMP29:%.*]] = and <4 x i8> [[TMP25]], splat (i8 1) +; CHECK-NEXT: [[TMP30:%.*]] = and <4 x i8> [[TMP26]], splat (i8 1) +; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i8> [[TMP27]], splat (i8 1) ; CHECK-NEXT: [[TMP32:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i8> [[TMP29]] to <4 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = zext <4 x i8> [[TMP30]] to <4 x i32> @@ -401,7 +401,7 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[TMP38]] = add <4 x i32> [[VEC_PHI5]], [[TMP34]] ; CHECK-NEXT: [[TMP39]] = add <4 x i32> [[VEC_PHI6]], [[TMP35]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 094f80e287f0b29..9feeee004025c95 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -114,7 +114,7 @@ define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 19) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group [[ACC_GRP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll index c6a87fa21fb0cc1..3af3ad9ab3cf5ba 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll @@ -16,7 +16,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 10000) ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 6 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[B:%.*]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 @@ -91,7 +91,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll index 5ec7c1d45f8cf14..85d6c801dee6903 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll @@ -30,7 +30,7 @@ define void @test(ptr %A) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC]], splat (i32 2) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i32 0 ; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index 12f95e0a0a7dc8a..6f682c2d60f3f80 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -14,9 +14,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 @@ -27,14 +27,14 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i64> [[STEP_ADD2]], -; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], -; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD5]], -; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], +; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], splat (i64 128) +; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], splat (i64 128) +; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD5]], splat (i64 128) +; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], splat (i64 128) ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]] @@ -47,7 +47,7 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr [[TMP34]], i32 4, <4 x i1> [[TMP18]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP15]], ptr [[TMP35]], i32 4, <4 x i1> [[TMP19]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -120,10 +120,10 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD1]], -; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], +; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], splat (i64 128) +; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD1]], splat (i64 128) +; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD2]], splat (i64 128) +; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], splat (i64 128) ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP15]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[TMP24]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index d25b03943fa217a..4174aa4e897bca8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -40,7 +40,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -124,7 +124,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -148,7 +148,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], i32 1, <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.body: @@ -213,7 +213,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -297,7 +297,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -322,7 +322,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP4]], i32 1, <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -402,7 +402,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -486,7 +486,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -521,7 +521,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP6]], i32 1, <8 x i1> [[TMP4]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -606,7 +606,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], splat (i32 3) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -690,7 +690,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -725,7 +725,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP6]], i32 1, <8 x i1> [[TMP4]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -792,7 +792,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i64 1 @@ -828,7 +828,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read ; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[TMP32]], ptr [[TMP33]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -906,7 +906,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -990,7 +990,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1088,7 +1088,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -1169,7 +1169,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; DISABLED_MASKED_STRIDED: pred.load.continue14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if15: @@ -1397,7 +1397,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1425,7 +1425,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP6]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -1493,7 +1493,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -1574,7 +1574,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; DISABLED_MASKED_STRIDED: pred.load.continue14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if15: @@ -1802,7 +1802,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1818,7 +1818,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; ENABLED_MASKED_STRIDED: pred.load.if: @@ -1899,7 +1899,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; ENABLED_MASKED_STRIDED: pred.load.continue14: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] ; ENABLED_MASKED_STRIDED: pred.load.if15: @@ -2127,7 +2127,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; ENABLED_MASKED_STRIDED: pred.store.continue60: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -2213,7 +2213,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -2294,7 +2294,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] ; DISABLED_MASKED_STRIDED: pred.load.continue16: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = or disjoint <8 x i32> [[TMP2]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = or disjoint <8 x i32> [[TMP2]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if17: @@ -2522,7 +2522,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE62]] ; DISABLED_MASKED_STRIDED: pred.store.continue62: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -2559,7 +2559,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -2647,7 +2647,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -2728,7 +2728,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; DISABLED_MASKED_STRIDED: pred.load.continue14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if15: @@ -2956,7 +2956,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll index b850dc3ecef85d1..bd54f87b5048961 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -27,7 +27,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], splat (i64 2) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i64 1 @@ -46,7 +46,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = or disjoint <4 x i64> [[TMP1]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = or disjoint <4 x i64> [[TMP1]], splat (i64 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP15]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP16]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP15]], i64 1 @@ -64,7 +64,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP27]], ptr [[TMP23]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -141,7 +141,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nsw <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 2) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: @@ -180,7 +180,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED: pred.store.continue6: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP19]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) -; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], splat (i64 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if8: @@ -218,7 +218,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] ; DISABLED_MASKED_STRIDED: pred.store.continue15: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP37]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end.loopexit: @@ -247,7 +247,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDEX]], 3 -; ENABLED_MASKED_STRIDED-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[POINTS:%.+]], i64 [[TMP2]] +; ENABLED_MASKED_STRIDED-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[POINTS:%.*]], i64 [[TMP2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP3]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> @@ -314,7 +314,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], splat (i64 3) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: @@ -352,7 +352,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; DISABLED_MASKED_STRIDED: pred.store.continue6: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -367,7 +367,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], splat (i64 3) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if: @@ -405,7 +405,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; ENABLED_MASKED_STRIDED: pred.store.continue6: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll index 6ed398bb5bea50d..5e4ba0b6d33d955 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll @@ -26,7 +26,7 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" ;CHECK-NEXT: %index = phi i32 ;CHECK-NEXT: %[[VECIND:.+]] = phi <8 x i32> [ ;CHECK-NEXT: %[[VMASK:.+]] = icmp ugt <8 x i32> %[[VECIND]], %{{broadcast.splat*}} -;CHECK-NEXT: %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], +;CHECK-NEXT: %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], splat (i32 1) ;CHECK-NEXT: %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0 ;CHECK-NEXT: br i1 %[[M]], label %pred.store.if, label %pred.store.continue ;CHECK-NOT: %{{.+}} = load <16 x i8>, ptr %{{.*}}, align 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index eee1b6f35d1b706..126bbf9afc34e8e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -227,7 +227,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = mul <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 777) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: @@ -253,7 +253,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -301,7 +301,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE14:%.*]] ] ; SINK-GATHER-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE14]] ] ; SINK-GATHER-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_UDIV_CONTINUE14]] ] -; SINK-GATHER-NEXT: [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], +; SINK-GATHER-NEXT: [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], splat (i64 777) ; SINK-GATHER-NEXT: [[TMP1:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 0 ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; SINK-GATHER: pred.udiv.if: @@ -393,7 +393,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] ; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; SINK-GATHER-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-GATHER-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-GATHER: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index 0ca3e05d77c044c..c81f48ff62afc32 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -5,8 +5,8 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: vector.body: ; CHECK: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr {{.*}}, align 4 ; CHECK: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr {{.*}}, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+02) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+02) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll index 8b06d9f61e5da10..4c95584ff253a48 100644 --- a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll +++ b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll @@ -115,7 +115,7 @@ define i64 @invar_cond(i1 %c) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> splat (i64 1) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -174,8 +174,8 @@ define i64 @invar_cond_incoming_ops_reordered(i1 %c) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> , <4 x i64> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll index f2111081ffca9c1..8397b1ed88dcddb 100644 --- a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll +++ b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll @@ -17,9 +17,9 @@ define i32 @foo(ptr nocapture %A) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shl nsw <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP0:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 8) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 @@ -33,7 +33,7 @@ define i32 @foo(ptr nocapture %A) { ; CHECK-NEXT: store i32 4, ptr [[TMP7]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -41,7 +41,7 @@ define i32 @foo(ptr nocapture %A) { ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index 782efb7acc644ff..deca8ea161cc4cf 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -45,7 +45,7 @@ define void @redundant_iv_cast(ptr %dst) { ; VF4: vector.body: ; VF4: [[VEC_IND:%.+]] = phi <4 x i16> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ] ; VF4: store <4 x i16> [[VEC_IND]] -; VF4: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; VF4: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; ; IC2-LABEL: @redundant_iv_cast ; IC2: vector.body: @@ -120,7 +120,7 @@ define void @cast_induction_tail_folding(ptr %A) { ; VF4-LABEL: @cast_induction_tail_folding( ; VF4: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ] ; VF4-NEXT: [[VEC_IND:%.+]] = phi <4 x i32> [ , %vector.ph ] -; VF4-NEXT: = icmp ule <4 x i32> [[VEC_IND]], +; VF4-NEXT: = icmp ule <4 x i32> [[VEC_IND]], splat (i32 2) ; VF4-NEXT: = sext <4 x i32> [[VEC_IND]] to <4 x i64> ; IC2-LABEL: @cast_induction_tail_folding( diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index aea383226c71e65..c745b4f74786c9a 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -49,7 +49,7 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDUCTION_IV_LCSSA1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> , [[TMP4]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index 3bf2591391121ea..66aceab9fb27c80 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -17,21 +17,21 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK: for.cond5.preheader1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> ), !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> , <4 x ptr> [[TMP1]], i32 4, <4 x i1> ), !dbg [[DBG22]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 2, !dbg [[DBG22]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> , <4 x ptr> [[TMP2]], i32 4, <4 x i1> ), !dbg [[DBG22]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 2), <4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 3, !dbg [[DBG22]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> , <4 x ptr> [[TMP3]], i32 4, <4 x i1> ), !dbg [[DBG22]] -; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], , !dbg [[DBG24:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], , !dbg [[DBG25:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 3), <4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] +; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1), !dbg [[DBG24:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], splat (i64 5), !dbg [[DBG25:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0, !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP32]], label [[FOR_COND5_PREHEADER1]], !dbg [[DBG26]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]], !dbg [[DBG21]] ; CHECK: scalar.ph: @@ -45,19 +45,19 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: [[L_022:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INC10:%.*]], [[FOR_COND5_PREHEADER]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[H]], i64 [[L_022]] ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !dbg [[DBG22]] -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP10]], i64 1, !dbg [[DBG33:![0-9]+]] +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP10]], i64 1, !dbg [[DBG31:![0-9]+]] ; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX_1]], align 4, !dbg [[DBG22]] -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2, !dbg [[DBG33]] +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2, !dbg [[DBG31]] ; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX_2]], align 4, !dbg [[DBG22]] -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP10]], i64 3, !dbg [[DBG33]] +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP10]], i64 3, !dbg [[DBG31]] ; CHECK-NEXT: store i32 3, ptr [[ARRAYIDX_3]], align 4, !dbg [[DBG22]] ; CHECK-NEXT: [[INC10]] = add nuw nsw i64 [[L_022]], 1, !dbg [[DBG24]] ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC10]], 5, !dbg [[DBG25]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_COND5_PREHEADER]], !dbg [[DBG26]] ; CHECK: for.cond.cleanup3: -; CHECK-NEXT: [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG27:![0-9]+]] +; CHECK-NEXT: [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG32:![0-9]+]] ; CHECK-NEXT: #dbg_value(i64 [[INC13]], [[META11]], !DIExpression(), [[META20]]) -; CHECK-NEXT: [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG28:![0-9]+]] +; CHECK-NEXT: [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG33:![0-9]+]] ; CHECK-NEXT: br i1 [[EXITCOND24_NOT]], label [[EXIT]], label [[FOR_COND1_PREHEADER]], !dbg [[DBG21]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void, !dbg [[DBG35:![0-9]+]] @@ -160,13 +160,13 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; CHECK: [[DBG24]] = !DILocation(line: 11, column: 32, scope: [[META19]]) ; CHECK: [[DBG25]] = !DILocation(line: 11, column: 26, scope: [[META19]]) ; CHECK: [[DBG26]] = !DILocation(line: 11, column: 5, scope: [[META15]]) -; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[DBG21]], [[META30:![0-9]+]], [[META31:![0-9]+]], [[META32:![0-9]+]]} -; CHECK: [[META30]] = !DILocation(line: 13, column: 13, scope: [[META12]]) -; CHECK: [[META31]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META32]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[DBG33]] = !DILocation(line: 13, column: 2, scope: [[META23]]) -; CHECK: [[DBG27]] = !DILocation(line: 10, column: 30, scope: [[META16]]) -; CHECK: [[DBG28]] = !DILocation(line: 10, column: 24, scope: [[META16]]) -; CHECK: [[LOOP34]] = distinct !{[[LOOP34]], [[DBG21]], [[META30]], [[META31]]} +; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[DBG21]], [[META28:![0-9]+]], [[META29:![0-9]+]], [[META30:![0-9]+]]} +; CHECK: [[META28]] = !DILocation(line: 13, column: 13, scope: [[META12]]) +; CHECK: [[META29]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META30]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[DBG31]] = !DILocation(line: 13, column: 2, scope: [[META23]]) +; CHECK: [[DBG32]] = !DILocation(line: 10, column: 30, scope: [[META16]]) +; CHECK: [[DBG33]] = !DILocation(line: 10, column: 24, scope: [[META16]]) +; CHECK: [[LOOP34]] = distinct !{[[LOOP34]], [[DBG21]], [[META28]], [[META29]]} ; CHECK: [[DBG35]] = !DILocation(line: 14, column: 1, scope: [[DBG4]]) ;. diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index 8b5dd4211d85051..c2edb54f6f3b57b 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -46,16 +46,16 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) { ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP2:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP4:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2]] = add i64 [[TMP1]], [[R]] +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP4]] = add i64 [[TMP2]], [[R]] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ [[TMP2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ [[TMP4]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[TMP5]] ; entry: br label %for.body @@ -155,8 +155,8 @@ define void @dead_load_and_vector_pointer(ptr %a, ptr %b) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8, !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8, !alias.scope [[META6]], !noalias [[META9]] -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP4]], align 4, !alias.scope [[META6]], !noalias [[META9]] ; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP5]], align 4, !alias.scope [[META6]], !noalias [[META9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll index cc305781c62417d..e28dd77eadd5295 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll @@ -61,8 +61,8 @@ define i32 @test_debug_loc_on_branch_in_loop(ptr noalias %src, ptr noalias %dst) ; CHECK-LABEL: define i32 @test_debug_loc_on_branch_in_loop( ; CHECK-LABEL: vector.body: ; CHECK: [[LOAD:%.+]] = load <2 x i32>, ptr {{.+}}, align 4 -; CHECK-NEXT: [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], -; CHECK-NEXT: [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], , !dbg [[LOC3:!.+]] +; CHECK-NEXT: [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], splat (i32 10) +; CHECK-NEXT: [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], splat (i1 true), !dbg [[LOC3:!.+]] ; CHECK-NEXT: [[EXT:%.+]] = extractelement <2 x i1> [[XOR]], i32 0, !dbg [[LOC3]] ; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue ; CHECK-NOT: !dbg @@ -101,8 +101,8 @@ define i32 @test_different_debug_loc_on_replicate_recipe(ptr noalias %src, ptr n ; CHECK-LABEL: define i32 @test_different_debug_loc_on_replicate_recipe( ; CHECK-LABEL: vector.body: ; CHECK: [[LOAD:%.+]] = load <2 x i32>, ptr {{.+}}, align 4 -; CHECK-NEXT: [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], -; CHECK-NEXT: [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], , !dbg [[LOC4:!.+]] +; CHECK-NEXT: [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], splat (i32 10) +; CHECK-NEXT: [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], splat (i1 true), !dbg [[LOC4:!.+]] ; CHECK-NEXT: [[EXT:%.+]] = extractelement <2 x i1> [[XOR]], i32 0, !dbg [[LOC4]] ; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue ; CHECK-NOT: !dbg @@ -146,7 +146,7 @@ define void @test_misc(ptr nocapture %a, ptr noalias %b, i64 %size) !dbg !35 { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr %b, i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> zeroinitializer, !dbg [[LOC6:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0, !dbg [[LOC7:![0-9]+]] ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP6]], align 4, !dbg [[LOC7]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index 274174cceeb1c2a..1e37b7be662dfd1 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -19,9 +19,9 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 -; CHECK-NEXT: store <2 x i32> , ptr [[TMP6]], align 1 -; CHECK-NEXT: store <2 x i32> , ptr [[TMP7]], align 1 -; CHECK-NEXT: store <2 x i32> , ptr [[TMP8]], align 1 +; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP6]], align 1 +; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP7]], align 1 +; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1800 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll index 76ca2507b914cfd..53686ee76cbbdb3 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -21,7 +21,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize { ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP2]], align 1 +; CHECK-NEXT: store <4 x i32> splat (i32 13), ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -37,7 +37,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize { ; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], [[ALIGNEDTC]] -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -86,7 +86,7 @@ define dso_local void @assumeAlignedTC(ptr noalias nocapture %A, i32 %p, i32 %q) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP2]], align 1 +; CHECK-NEXT: store <4 x i32> splat (i32 13), ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -198,7 +198,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 5f2e91b8d1f32d0..8afde74cf277dc4 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -223,10 +223,10 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3]] = or <4 x i1> [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -261,10 +261,10 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND11:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[VEC_IND11]], [[BROADCAST_SPLAT14]] -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; CHECK-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI10]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX9]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], splat (i32 4) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -359,7 +359,7 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i64> [[TMP19]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP20]], +; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP20]], splat (i1 true) ; CHECK-NEXT: [[RDX_SELECT_CMP]] = or <4 x i1> [[VEC_PHI]], [[TMP21]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -414,7 +414,7 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i64> [[TMP39]], i64 [[TMP36]], i32 2 ; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i64> [[TMP40]], i64 [[TMP37]], i32 3 ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq <4 x i64> [[TMP41]], [[BROADCAST_SPLAT19]] -; CHECK-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP42]], +; CHECK-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP42]], splat (i1 true) ; CHECK-NEXT: [[TMP43]] = or <4 x i1> [[VEC_PHI12]], [[TMP46]] ; CHECK-NEXT: [[INDEX_NEXT20]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT20]], [[N_VEC8]] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index f78654176e23eba..3c1dd1bd8b6d139 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -201,7 +201,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 65535) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 @@ -228,7 +228,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ [[TMP12]], [[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[VEC_PHI2]], splat (i32 65535) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i32 [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP16]], align 2 @@ -327,7 +327,7 @@ define float @multiple_fp_rdx(ptr %A, i64 %N) { ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> , float [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> splat (float 1.000000e+00), float [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> zeroinitializer, float [[BC_MERGE_RDX3]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll index 756b0ab9612b8b8..7a92d1a1c9ea5f4 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll @@ -30,7 +30,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 2) ; CHECK-NEXT: store <4 x i8> [[TMP9]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -54,7 +54,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i8> [[WIDE_LOAD6]], +; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i8> [[WIDE_LOAD6]], splat (i8 2) ; CHECK-NEXT: store <4 x i8> [[TMP15]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] diff --git a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll index 47636b2c66d296a..f648de1358998e8 100644 --- a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll +++ b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll @@ -13,7 +13,7 @@ define void @inv_store_last_lane(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -74,7 +74,7 @@ define float @ret_last_lane(ptr noalias nocapture %a, ptr noalias nocapture read ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index b0ece3980cdf24d..7aedb218e135205 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -170,7 +170,7 @@ define void @test_first_order_recurrences_incoming_cycle_preheader(ptr %ptr) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP4]], splat (i16 10) ; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP2]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 @@ -314,7 +314,7 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr) ; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP4]], splat (i16 10) ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP7]], [[TMP6]] ; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -365,7 +365,7 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr % ; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP6]], splat (i16 10) ; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 @@ -413,7 +413,7 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr) { ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> , [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x double> [[TMP6]], [[TMP4]] ; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -480,11 +480,11 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], splat (i64 10) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: @@ -520,11 +520,11 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], splat (i64 10) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index eda92aae095ddfc..0f206bfb92fa01a 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -445,9 +445,9 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP9]], align 4 @@ -535,10 +535,10 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr n ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[TMP4]], +; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP10]], align 4 @@ -696,8 +696,8 @@ define i16 @multiple_exit(ptr %p, i32 %n) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -772,8 +772,8 @@ define i16 @multiple_exit2(ptr %p, i32 %n) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -841,8 +841,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213) +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> splat (i32 22) ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -852,8 +852,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -922,9 +922,9 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], splat (i32 2) +; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], splat (i32 99) +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213) ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]] ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -1045,7 +1045,7 @@ define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> , [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: store <4 x double> [[TMP6]], ptr [[TMP2]], align 8 @@ -1114,7 +1114,7 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) { ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], [[TMP3]] ; CHECK-NEXT: store <4 x double> [[TMP6]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -1125,7 +1125,7 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index 20acc1053532363..c37d2c999d97aa5 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -230,11 +230,11 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias % ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4]] = xor <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], splat (i32 10) +; CHECK-NEXT: [[TMP4]] = xor <4 x i32> splat (i32 12), [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 255) ; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 @@ -405,11 +405,11 @@ define void @hoist_previous_value_and_operand(ptr %dst, i64 %mask) { ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4]] = trunc <4 x i64> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6]] = or <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP6]] = or <4 x i32> [[TMP5]], splat (i32 3) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 336 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index a90594085d3cfe4..6c94f5bfc836a77 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -901,9 +901,9 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -996,7 +996,7 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: @@ -1240,7 +1240,7 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1297,7 +1297,7 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1351,10 +1351,10 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP0]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -1423,7 +1423,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[TMP0]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SINK-AFTER: middle.block: @@ -1978,8 +1978,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 -; UNROLL-NO-IC-NEXT: store <4 x i32> , ptr [[TMP17]], align 4 -; UNROLL-NO-IC-NEXT: store <4 x i32> , ptr [[TMP18]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP17]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP18]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP9]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = load i16, ptr [[TMP10]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP11]], align 2 @@ -2120,7 +2120,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1 ; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1 ; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; SINK-AFTER-NEXT: store <4 x i32> , ptr [[TMP9]], align 4 +; SINK-AFTER-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP9]], align 4 ; SINK-AFTER-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP5]], align 2 ; SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP6]], align 2 ; SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP7]], align 2 @@ -2221,8 +2221,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]] @@ -2339,7 +2339,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP3]], align 2 ; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], +; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], splat (i32 2) ; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP6]], [[TMP7]] ; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] @@ -2510,26 +2510,26 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) ; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], -; UNROLL-NO-IC-NEXT: [[TMP5]] = add <4 x i16> [[TMP2]], +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], splat (i16 5) +; UNROLL-NO-IC-NEXT: [[TMP5]] = add <4 x i16> [[TMP2]], splat (i16 5) ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP4]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], splat (i16 10) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i32 4 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP11]], align 2 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP12]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 ; UNROLL-NO-IC-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -2618,16 +2618,16 @@ define void @sink_dead_inst(ptr %a) { ; SINK-AFTER-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) ; SINK-AFTER-NEXT: [[TMP2]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], +; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], splat (i16 5) ; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], +; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], splat (i16 10) ; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[TMP0]] ; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0 ; SINK-AFTER-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 ; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; SINK-AFTER: middle.block: @@ -3001,7 +3001,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE30]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE30]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE30]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 @@ -3148,7 +3148,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI2]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -3357,7 +3357,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER: pred.store.continue13: ; SINK-AFTER-NEXT: [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] ; SINK-AFTER: middle.block: @@ -3421,10 +3421,10 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]] @@ -3433,7 +3433,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -3521,14 +3521,14 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) ; SINK-AFTER-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; SINK-AFTER-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]] ; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; SINK-AFTER: middle.block: @@ -3592,11 +3592,11 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], -; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) +; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -3659,10 +3659,10 @@ define void @unused_recurrence(ptr %a) { ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], -; SINK-AFTER-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) +; SINK-AFTER-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5) ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 ; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; SINK-AFTER: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 21aa9dc97187d75..89ac22b054e24d7 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -539,7 +539,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VEC4_INTERL1: middle.block: @@ -584,13 +584,13 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4 ; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL2-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VEC4_INTERL2: middle.block: @@ -688,7 +688,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: @@ -783,14 +783,14 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 ; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] -; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01) ; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[TMP8]], [[TMP7]] ; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[TMP9]], ptr [[TMP10]], align 4 ; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.000000e+00) ; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] ; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -843,7 +843,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], +; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 @@ -862,8 +862,8 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD11]], ptr [[TMP7]], align 4 ; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST]] ; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST]] -; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND]], -; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01) +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.500000e+00) ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[TMP8]] ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP9]] ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] @@ -875,7 +875,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: store <4 x float> [[TMP10]], ptr [[TMP16]], align 4 ; VEC4_INTERL2-NEXT: store <4 x float> [[TMP11]], ptr [[TMP17]], align 4 ; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -4.000000e+00) ; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT5]] ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -1023,14 +1023,14 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP10]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -1121,7 +1121,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VEC4_INTERL1: middle.block: @@ -1163,13 +1163,13 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4 ; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL2-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VEC4_INTERL2: middle.block: @@ -1264,7 +1264,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll index 4b771bf57f977aa..c71ff2b9fa4c478 100644 --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; ; CHECK-NOT: Found uniform instruction: %cond = icmp slt i64 %i.next, %n ; CHECK: vector.body -; CHECK: %[[I:.+]] = add nuw nsw <4 x i64> %vec.ind, +; CHECK: %[[I:.+]] = add nuw nsw <4 x i64> %vec.ind, splat (i64 1) ; CHECK: icmp slt <4 x i64> %[[I]], %broadcast.splat ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body ; diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index 8b0c99b353c8b75..492eb091175e222 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -33,11 +33,11 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 20) +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> splat (i32 3) +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9) ; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -140,16 +140,16 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19) +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4) +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5) +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 6), <4 x i32> splat (i32 11) ; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> splat (i32 9) ; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]] -; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> splat (i32 18) ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI4]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META9]], !noalias [[META12]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI5]], ptr [[TMP6]], align 4, !alias.scope [[META12]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index ecb57c539a40ef2..b8597b85f79bea8 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -46,18 +46,18 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASR]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4, !alias.scope !5, !noalias !8 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope !12, !noalias !13 +; CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15 +; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope !15 -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[WIDE_LOAD23]], -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD24]], -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i32> [[WIDE_LOAD25]], -; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META15]] +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23) +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[WIDE_LOAD23]], splat (i32 24) +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD24]], splat (i32 25) +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i32> [[WIDE_LOAD25]], splat (i32 26) +; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; CHECK: pred.urem.if: @@ -108,19 +108,19 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; CHECK-NEXT: [[TMP53:%.*]] = phi <2 x i32> [ [[TMP32]], [[PRED_UREM_CONTINUE]] ], [ [[TMP43]], [[PRED_UREM_IF26]] ] ; CHECK-NEXT: [[TMP54:%.*]] = phi <2 x i32> [ [[TMP33]], [[PRED_UREM_CONTINUE]] ], [ [[TMP47]], [[PRED_UREM_IF26]] ] ; CHECK-NEXT: [[TMP55:%.*]] = phi <2 x i32> [ [[TMP34]], [[PRED_UREM_CONTINUE]] ], [ [[TMP51]], [[PRED_UREM_IF26]] ] -; CHECK-NEXT: [[TMP56:%.*]] = xor <2 x i1> [[TMP13]], +; CHECK-NEXT: [[TMP56:%.*]] = xor <2 x i1> [[TMP13]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP9]], <2 x i32> [[TMP52]] ; CHECK-NEXT: [[PREDPHI28:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP10]], <2 x i32> [[TMP53]] ; CHECK-NEXT: [[PREDPHI29:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP11]], <2 x i32> [[TMP54]] ; CHECK-NEXT: [[PREDPHI30:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP12]], <2 x i32> [[TMP55]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP57]], align 4, !alias.scope !5, !noalias !8 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP57]], align 4, !alias.scope [[META5]], !noalias [[META8]] ; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI28]], ptr [[TMP58]], align 4, !alias.scope !12, !noalias !13 +; CHECK-NEXT: store <2 x i32> [[PREDPHI28]], ptr [[TMP58]], align 4, !alias.scope [[META12]], !noalias [[META13]] ; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI29]], ptr [[TMP59]], align 4, !alias.scope !14, !noalias !15 +; CHECK-NEXT: store <2 x i32> [[PREDPHI29]], ptr [[TMP59]], align 4, !alias.scope [[META14]], !noalias [[META15]] ; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI30]], ptr [[TMP60]], align 4, !alias.scope !15 +; CHECK-NEXT: store <2 x i32> [[PREDPHI30]], ptr [[TMP60]], align 4, !alias.scope [[META15]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP61:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP61]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -207,14 +207,14 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ASR]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !5, !noalias !8 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !5, !noalias !8 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !12, !noalias !13 -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !12, !noalias !13 -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !14, !noalias !15 -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15 -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope !15 -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope !15 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope [[META12]], !noalias [[META13]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META14]], !noalias [[META15]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META15]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META15]] ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP10]], 23 ; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP11]], 23 ; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP12]], 24 @@ -259,14 +259,14 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; UNROLL-NO-VF-NEXT: [[PREDPHI29:%.*]] = select i1 [[TMP45]], i32 [[TMP23]], i32 [[TMP42]] ; UNROLL-NO-VF-NEXT: [[PREDPHI30:%.*]] = select i1 [[TMP44]], i32 [[TMP24]], i32 [[TMP35]] ; UNROLL-NO-VF-NEXT: [[PREDPHI31:%.*]] = select i1 [[TMP45]], i32 [[TMP25]], i32 [[TMP43]] -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !5, !noalias !8 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI25]], ptr [[TMP3]], align 4, !alias.scope !5, !noalias !8 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI26]], ptr [[TMP4]], align 4, !alias.scope !12, !noalias !13 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI27]], ptr [[TMP5]], align 4, !alias.scope !12, !noalias !13 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI28]], ptr [[TMP6]], align 4, !alias.scope !14, !noalias !15 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI29]], ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI30]], ptr [[TMP8]], align 4, !alias.scope !15 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI31]], ptr [[TMP9]], align 4, !alias.scope !15 +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI25]], ptr [[TMP3]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI26]], ptr [[TMP4]], align 4, !alias.scope [[META12]], !noalias [[META13]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI27]], ptr [[TMP5]], align 4, !alias.scope [[META12]], !noalias [[META13]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI28]], ptr [[TMP6]], align 4, !alias.scope [[META14]], !noalias [[META15]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI29]], ptr [[TMP7]], align 4, !alias.scope [[META14]], !noalias [[META15]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI30]], ptr [[TMP8]], align 4, !alias.scope [[META15]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI31]], ptr [[TMP9]], align 4, !alias.scope [[META15]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; UNROLL-NO-VF-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -371,12 +371,12 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope !23 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META23]] +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23) +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] ; CHECK: pred.sdiv.if: @@ -401,10 +401,10 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE4]] ; CHECK: pred.sdiv.continue4: ; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ [[TMP15]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP22]], [[PRED_SDIV_IF3]] ] -; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP5]], <2 x i32> [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4, !alias.scope !20, !noalias !23 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4, !alias.scope [[META20]], !noalias [[META23]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP27]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] @@ -448,12 +448,12 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META20]], !noalias [[META23]] ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !23 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !23 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META23]] +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META23]] ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100 @@ -476,8 +476,8 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = xor i1 [[TMP13]], true ; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[TMP17]] ; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[TMP21]] -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23 +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META20]], !noalias [[META23]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope [[META20]], !noalias [[META23]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; UNROLL-NO-VF-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] @@ -552,15 +552,15 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META29:![0-9]+]], !noalias [[META32:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope !32 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], , !dbg [[DBG34:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP7]], <2 x i1> zeroinitializer, !dbg [[DBG35:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META32]] +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23) +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100) +; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true), !dbg [[DBG34:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], splat (i32 200) +; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer, !dbg [[DBG35:![0-9]+]] ; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i1> [[TMP9]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] @@ -586,11 +586,11 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE4]] ; CHECK: pred.sdiv.continue4: ; CHECK-NEXT: [[TMP28:%.*]] = phi <2 x i32> [ [[TMP19]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP26]], [[PRED_SDIV_IF3]] ] -; CHECK-NEXT: [[TMP29:%.*]] = xor <2 x i1> [[TMP7]], , !dbg [[DBG35]] -; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP29]], <2 x i1> zeroinitializer, !dbg [[DBG35]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[TMP8]], splat (i1 true), !dbg [[DBG35]] +; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP27]], <2 x i1> zeroinitializer, !dbg [[DBG35]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP5]], <2 x i32> [[TMP28]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope !29, !noalias !32 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META29]], !noalias [[META32]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP32]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] @@ -636,22 +636,22 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META29:![0-9]+]], !noalias [[META32:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META29]], !noalias [[META32]] ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !32 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !32 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META32]] +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META32]] ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100 ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP5]], 100 -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true, !dbg [[DBG34:![0-9]+]] -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true, !dbg [[DBG34]] -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp sge i32 [[TMP4]], 200 -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp sge i32 [[TMP5]], 200 -; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = select i1 [[TMP16]], i1 [[TMP14]], i1 false, !dbg [[DBG35:![0-9]+]] -; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = select i1 [[TMP17]], i1 [[TMP15]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP12]], true, !dbg [[DBG34:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true, !dbg [[DBG34]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp sge i32 [[TMP4]], 200, !dbg [[DBG34]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp sge i32 [[TMP5]], 200, !dbg [[DBG34]] +; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = select i1 [[TMP14]], i1 [[TMP16]], i1 false, !dbg [[DBG35:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = select i1 [[TMP15]], i1 [[TMP17]], i1 false, !dbg [[DBG35]] ; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP12]] ; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = or i1 [[TMP19]], [[TMP13]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP20]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] @@ -668,14 +668,14 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.continue3: ; UNROLL-NO-VF-NEXT: [[TMP29:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP27]], [[PRED_SDIV_IF2]] ] -; UNROLL-NO-VF-NEXT: [[TMP30:%.*]] = xor i1 [[TMP14]], true, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP31:%.*]] = xor i1 [[TMP15]], true, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP32:%.*]] = select i1 [[TMP16]], i1 [[TMP30]], i1 false, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP33:%.*]] = select i1 [[TMP17]], i1 [[TMP31]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP28:%.*]] = xor i1 [[TMP16]], true, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP30:%.*]] = xor i1 [[TMP17]], true, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP32:%.*]] = select i1 [[TMP14]], i1 [[TMP28]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i1 [[TMP30]], i1 false, !dbg [[DBG35]] ; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP32]], i32 [[TMP10]], i32 [[TMP25]] ; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP33]], i32 [[TMP11]], i32 [[TMP29]] -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32 +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META29]], !noalias [[META32]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope [[META29]], !noalias [[META32]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; UNROLL-NO-VF-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll b/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll index 3496ca343023e45..9882013e726ef38 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll @@ -17,10 +17,10 @@ for.cond.cleanup: ; preds = %if.end ; CHECK-LABEL: test ; CHECK: vector.body: -; CHECK: %{{.*}} = sdiv <2 x i32> %{{.*}}, -; CHECK: %{{.*}} = udiv <2 x i32> %{{.*}}, -; CHECK: %{{.*}} = srem <2 x i32> %{{.*}}, -; CHECK: %{{.*}} = urem <2 x i32> %{{.*}}, +; CHECK: %{{.*}} = sdiv <2 x i32> %{{.*}}, splat (i32 11) +; CHECK: %{{.*}} = udiv <2 x i32> %{{.*}}, splat (i32 13) +; CHECK: %{{.*}} = srem <2 x i32> %{{.*}}, splat (i32 17) +; CHECK: %{{.*}} = urem <2 x i32> %{{.*}}, splat (i32 19) ; CHECK-NOT: %{{.*}} = sdiv <2 x i32> %{{.*}}, ; CHECK-NOT: %{{.*}} = udiv <2 x i32> %{{.*}}, ; CHECK-NOT: %{{.*}} = srem <2 x i32> %{{.*}}, diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 255768d8794c36d..01eb535e6586cff 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -99,7 +99,7 @@ define i32 @test(ptr nocapture %f) #0 { ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; VEC-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], +; VEC-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 100) ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; VEC-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC: pred.store.if: @@ -350,8 +350,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.continue2: -; VEC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], -; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; VEC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 1) +; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; VEC-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP17]], <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP16]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index 330cdeaeb7c2758..220e62d9e37300a 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1216,13 +1216,13 @@ define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP9]] ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]] @@ -1342,12 +1342,12 @@ define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonl ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP6]] ; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] @@ -1604,7 +1604,7 @@ define i64 @fcmp_0_add_select2(ptr noalias %x, i64 %N) nounwind readonly { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], splat (i64 2) ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1686,7 +1686,7 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], splat (i32 2) ; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1764,7 +1764,7 @@ define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], splat (i32 2) ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 069cb1f7cad7b90..ecb00d472448876 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -208,7 +208,7 @@ for.end: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; CHECK-NEXT: store <8 x i64> [[VEC_IND]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label %middle.block, label [[VECTOR_BODY]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 8bdba25b1b761eb..d6eb4264c0a0d7a 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -31,7 +31,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -72,7 +72,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IND: middle.block: @@ -110,13 +110,13 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL: middle.block: @@ -155,7 +155,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 @@ -163,7 +163,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP5]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -201,13 +201,13 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE: middle.block: @@ -891,7 +891,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], +; CHECK-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP18:%.*]] = fadd fast <2 x float> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19]] = fadd fast <2 x float> [[TMP18]], [[TMP16]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -951,7 +951,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; IND-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 ; IND-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0 ; IND-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1 -; IND-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], +; IND-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00) ; IND-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[TMP16]], [[TMP9]] ; IND-NEXT: [[TMP18]] = fadd fast <2 x float> [[TMP17]], [[TMP15]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -1026,8 +1026,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP21]], align 4 ; UNROLL-NEXT: [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0 ; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP28]], float [[TMP27]], i64 1 -; UNROLL-NEXT: [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], -; UNROLL-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], +; UNROLL-NEXT: [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00) +; UNROLL-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], splat (float 1.000000e+00) ; UNROLL-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[TMP30]], [[TMP13]] ; UNROLL-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP17]] ; UNROLL-NEXT: [[TMP34]] = fadd fast <2 x float> [[TMP32]], [[TMP25]] @@ -1107,8 +1107,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load float, ptr [[TMP22]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP28]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], -; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], +; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00) +; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], splat (float 1.000000e+00) ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = fadd fast <2 x float> [[TMP32]], [[TMP18]] ; UNROLL-NO-IC-NEXT: [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP26]] @@ -1174,8 +1174,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; INTERLEAVE-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x float> [[WIDE_VEC4]], <32 x float> poison, <4 x i32> ; INTERLEAVE-NEXT: [[WIDE_VEC6:%.*]] = load <32 x float>, ptr [[TMP9]], align 4 ; INTERLEAVE-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <32 x float> [[WIDE_VEC6]], <32 x float> poison, <4 x i32> -; INTERLEAVE-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_PHI]], -; INTERLEAVE-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], +; INTERLEAVE-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_PHI]], splat (float 1.000000e+00) +; INTERLEAVE-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], splat (float 1.000000e+00) ; INTERLEAVE-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[STRIDED_VEC]] ; INTERLEAVE-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[STRIDED_VEC3]] ; INTERLEAVE-NEXT: [[TMP14]] = fadd fast <4 x float> [[TMP12]], [[STRIDED_VEC5]] @@ -1574,7 +1574,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 @@ -1586,7 +1586,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -1637,7 +1637,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 1 -; IND-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], +; IND-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; IND-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i64 0 ; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; IND-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i64 1 @@ -1649,7 +1649,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; IND-NEXT: store i32 [[TMP15]], ptr [[TMP17]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; IND-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IND: middle.block: @@ -1702,9 +1702,9 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP10:%.*]] = or disjoint i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 3 -; UNROLL-NEXT: [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], -; UNROLL-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], -; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], +; UNROLL-NEXT: [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; UNROLL-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 2) +; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 8) ; UNROLL-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i64 0 ; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i64 1 @@ -1726,7 +1726,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NEXT: store i32 [[TMP24]], ptr [[TMP28]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NEXT: store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL: middle.block: @@ -1777,13 +1777,13 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shl nsw <2 x i64> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 @@ -1805,7 +1805,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP26]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -1989,7 +1989,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP13]] ; CHECK-NEXT: [[TMP15]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -2239,8 +2239,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL-NO-IC: pred.udiv.continue8: ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = phi <2 x i32> [ [[TMP20]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP25]], [[PRED_UDIV_IF7]] ] -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], -; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]] ; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP28]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP26]] ; UNROLL-NO-IC-NEXT: [[TMP29]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] @@ -2465,7 +2465,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1 ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -2513,7 +2513,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i64 1 ; IND-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IND: middle.block: @@ -2551,7 +2551,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 3 @@ -2572,7 +2572,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP9]], i64 1 ; UNROLL-NEXT: store i16 [[TMP17]], ptr [[TMP13]], align 2 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL: middle.block: @@ -2611,7 +2611,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 @@ -2633,7 +2633,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1 ; UNROLL-NO-IC-NEXT: store i16 [[TMP18]], ptr [[TMP14]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -2671,7 +2671,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 3 @@ -2708,7 +2708,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <4 x i16> [[TMP13]], i64 3 ; INTERLEAVE-NEXT: store i16 [[TMP29]], ptr [[TMP21]], align 2 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; INTERLEAVE: middle.block: @@ -2765,7 +2765,7 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -2833,9 +2833,9 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], -; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -2902,7 +2902,7 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -2970,9 +2970,9 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], -; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -3043,7 +3043,7 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] @@ -3111,9 +3111,9 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], -; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] @@ -3194,7 +3194,7 @@ define i32 @testoverflowcheck() { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -3323,14 +3323,14 @@ define i32 @testoverflowcheck() { ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP5]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP6]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3465,7 +3465,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; CHECK: middle.block: @@ -3530,7 +3530,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IND: middle.block: @@ -3590,7 +3590,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64 @@ -3599,7 +3599,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL: middle.block: @@ -3663,7 +3663,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i8 [[OFFSET_IDX]], 0 @@ -3673,7 +3673,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -3732,7 +3732,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64 @@ -3741,7 +3741,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; INTERLEAVE: middle.block: @@ -3845,7 +3845,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.block: @@ -3913,7 +3913,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; IND-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; IND: middle.block: @@ -3976,7 +3976,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; UNROLL-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64 @@ -3985,7 +3985,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 16) ; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL: middle.block: @@ -4052,7 +4052,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add i8 [[OFFSET_IDX]], 0 @@ -4062,7 +4062,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP16]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 8) ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4124,7 +4124,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 16) ; INTERLEAVE-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64 @@ -4133,7 +4133,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 32) ; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4211,7 +4211,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; CHECK: middle.block: @@ -4244,7 +4244,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; IND: middle.block: @@ -4274,14 +4274,14 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL: middle.block: @@ -4312,7 +4312,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -4320,7 +4320,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4349,14 +4349,14 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4416,7 +4416,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; CHECK: middle.block: @@ -4454,7 +4454,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; IND: middle.block: @@ -4489,7 +4489,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 ; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30 ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] @@ -4497,7 +4497,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL: middle.block: @@ -4537,7 +4537,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP6]] @@ -4546,7 +4546,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP9]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4579,7 +4579,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] @@ -4587,7 +4587,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4650,7 +4650,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; CHECK: middle.block: @@ -4689,7 +4689,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; IND: middle.block: @@ -4724,7 +4724,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] @@ -4732,7 +4732,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL: middle.block: @@ -4768,7 +4768,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP1]] @@ -4777,7 +4777,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4811,7 +4811,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] @@ -4819,7 +4819,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4873,7 +4873,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; CHECK: middle.block: @@ -4911,7 +4911,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; IND-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; IND-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; IND: middle.block: @@ -4946,13 +4946,13 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP1]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; UNROLL: middle.block: @@ -4988,7 +4988,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -4996,7 +4996,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5031,13 +5031,13 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP0]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 16) ; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; INTERLEAVE: middle.block: @@ -5098,7 +5098,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; CHECK: pred.urem.if: @@ -5121,7 +5121,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; CHECK: middle.block: @@ -5185,7 +5185,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; IND-NEXT: [[TMP13:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; IND-NEXT: [[TMP14]] = or <2 x i32> [[VEC_PHI]], [[TMP13]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; IND: middle.block: @@ -5216,7 +5216,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE7]] ] ; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NEXT: [[TMP2:%.*]] = icmp ne <2 x i16> [[VEC_IND]], zeroinitializer -; UNROLL-NEXT: [[TMP3:%.*]] = icmp ne <2 x i16> [[VEC_IND]], +; UNROLL-NEXT: [[TMP3:%.*]] = icmp ne <2 x i16> [[VEC_IND]], splat (i16 -2) ; UNROLL-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0 ; UNROLL-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; UNROLL: pred.urem.if: @@ -5260,7 +5260,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NEXT: [[TMP26]] = or <2 x i32> [[VEC_PHI]], [[TMP24]] ; UNROLL-NEXT: [[TMP27]] = or <2 x i32> [[VEC_PHI1]], [[TMP25]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NEXT: [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; UNROLL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL: middle.block: @@ -5290,13 +5290,13 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE7]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE7]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE7]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[STEP_ADD]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.urem.if: @@ -5340,7 +5340,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] ; UNROLL-NO-IC-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI1]], [[TMP27]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5384,7 +5384,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE15]] ] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp ne <4 x i16> [[VEC_IND]], zeroinitializer -; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ne <4 x i16> [[VEC_IND]], +; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ne <4 x i16> [[VEC_IND]], splat (i16 -4) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 ; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; INTERLEAVE: pred.urem.if: @@ -5464,7 +5464,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; INTERLEAVE-NEXT: [[TMP46]] = or <4 x i32> [[VEC_PHI]], [[TMP44]] ; INTERLEAVE-NEXT: [[TMP47]] = or <4 x i32> [[VEC_PHI1]], [[TMP45]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 8) ; INTERLEAVE-NEXT: [[TMP48:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; INTERLEAVE-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; INTERLEAVE: middle.block: @@ -5541,19 +5541,19 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]] ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], -; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; CHECK: middle.block: @@ -5605,19 +5605,19 @@ define i64 @trunc_with_first_order_recurrence() { ; IND-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> ; IND-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]] -; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], +; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42) ; IND-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]] ; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]] ; IND-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> ; IND-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]] -; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], +; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1) ; IND-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]] ; IND-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; IND-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], -; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) ; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; IND: middle.block: @@ -5666,14 +5666,14 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND5:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], -; UNROLL-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2) ; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> ; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> ; UNROLL-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND3]] ; UNROLL-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD7]] -; UNROLL-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], -; UNROLL-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], +; UNROLL-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], splat (i32 42) +; UNROLL-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], splat (i32 42) ; UNROLL-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND3]] ; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD7]] ; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]] @@ -5682,9 +5682,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64> ; UNROLL-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]] ; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]] -; UNROLL-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], -; UNROLL-NEXT: [[STEP_ADD8:%.*]] = shl <2 x i32> [[VEC_IND5]], -; UNROLL-NEXT: [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD8]], +; UNROLL-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1) +; UNROLL-NEXT: [[STEP_ADD8:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1) +; UNROLL-NEXT: [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD8]], splat (i32 4) ; UNROLL-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]] ; UNROLL-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]] ; UNROLL-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64> @@ -5692,9 +5692,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] ; UNROLL-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; UNROLL-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], -; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; UNROLL-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 4) +; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], splat (i32 4) ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; UNROLL: middle.block: @@ -5744,15 +5744,15 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND5:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], -; UNROLL-NO-IC-NEXT: [[STEP_ADD8:%.*]] = add <2 x i32> [[VEC_IND5]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD8:%.*]] = add <2 x i32> [[VEC_IND5]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND3]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD7]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], splat (i32 42) +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], splat (i32 42) ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND3]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD7]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]] @@ -5761,8 +5761,8 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64> ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD8]], +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1) +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD8]], splat (i32 1) ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]] ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64> @@ -5770,9 +5770,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] ; UNROLL-NO-IC-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5824,14 +5824,14 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], -; INTERLEAVE-NEXT: [[STEP_ADD7]] = add <4 x i32> [[VEC_IND3]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; INTERLEAVE-NEXT: [[STEP_ADD7]] = add <4 x i32> [[VEC_IND3]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND3]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND3]], <4 x i32> [[STEP_ADD7]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[VEC_IND]], [[VEC_IND3]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[STEP_ADD]], [[STEP_ADD7]] -; INTERLEAVE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], -; INTERLEAVE-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], +; INTERLEAVE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], splat (i32 42) +; INTERLEAVE-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], splat (i32 42) ; INTERLEAVE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP0]], [[VEC_IND3]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP1]], [[STEP_ADD7]] ; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP4]] @@ -5840,9 +5840,9 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[TMP11:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64> ; INTERLEAVE-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_PHI]], [[TMP10]] ; INTERLEAVE-NEXT: [[TMP13:%.*]] = add <4 x i64> [[VEC_PHI2]], [[TMP11]] -; INTERLEAVE-NEXT: [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND5]], -; INTERLEAVE-NEXT: [[STEP_ADD8:%.*]] = shl <4 x i32> [[VEC_IND5]], -; INTERLEAVE-NEXT: [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD8]], +; INTERLEAVE-NEXT: [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND5]], splat (i32 1) +; INTERLEAVE-NEXT: [[STEP_ADD8:%.*]] = shl <4 x i32> [[VEC_IND5]], splat (i32 1) +; INTERLEAVE-NEXT: [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD8]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP4]], [[TMP14]] ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP5]], [[TMP15]] ; INTERLEAVE-NEXT: [[TMP18:%.*]] = sext <4 x i32> [[TMP16]] to <4 x i64> @@ -5850,9 +5850,9 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[TMP20]] = add <4 x i64> [[TMP12]], [[TMP18]] ; INTERLEAVE-NEXT: [[TMP21]] = add <4 x i64> [[TMP13]], [[TMP19]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; INTERLEAVE-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], -; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) +; INTERLEAVE-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], splat (i32 8) +; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; INTERLEAVE: middle.block: @@ -5944,7 +5944,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; CHECK: middle.block: @@ -5992,7 +5992,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]] ; IND-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; IND: middle.block: @@ -6013,7 +6013,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC:%.*]], align 4 @@ -6030,7 +6030,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP6]], align 4 ; UNROLL-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; UNROLL: middle.block: @@ -6051,7 +6051,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> @@ -6069,7 +6069,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP10]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP11]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -6106,7 +6106,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC:%.*]], align 4 @@ -6123,7 +6123,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; INTERLEAVE-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP6]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP9]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; INTERLEAVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; INTERLEAVE: middle.block: @@ -6435,7 +6435,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> , [[DOTSPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/induction_plus.ll b/llvm/test/Transforms/LoopVectorize/induction_plus.ll index 36f8dec3b637441..a57b3fad0440ca6 100644 --- a/llvm/test/Transforms/LoopVectorize/induction_plus.ll +++ b/llvm/test/Transforms/LoopVectorize/induction_plus.ll @@ -13,7 +13,7 @@ define void @array_at_plus_one(i32 %n) { ; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds [1024 x i32], ptr @array, i64 0, i64 [[T2]] ; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr [[GEP]], i32 0 ; CHECK-NEXT: store <4 x i32> [[VEC_IV_TRUNC]], ptr [[GEP0]] -; CHECK: [[VEC_IV_TRUNC_NEXT]] = add <4 x i32> [[VEC_IV_TRUNC]], +; CHECK: [[VEC_IV_TRUNC_NEXT]] = add <4 x i32> [[VEC_IV_TRUNC]], splat (i32 4) ; CHECK: ret void ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll index 553fc374e0fdf20..1f5d80a670a2d80 100644 --- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll @@ -14,7 +14,7 @@ define i32 @one_direct_branch(ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -72,7 +72,7 @@ define i32 @two_direct_branch(ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -140,12 +140,12 @@ define i32 @cond_branch(i32 %a, ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> splat (i32 10), <4 x i32> [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -210,7 +210,7 @@ define i32 @optimizable_trunc_used_outside() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll index 2b5e06c8c948be3..a7f3057935b6dee 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll @@ -58,7 +58,7 @@ define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapt ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shl <4 x i32> [[TMP23]], +; CHECK-NEXT: [[TMP24:%.*]] = shl <4 x i32> [[TMP23]], splat (i32 1) ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP25]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 54c08b47598e0fa..dc4fb0f15c64473 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -112,10 +112,10 @@ define void @test_struct_array_load3_store3() { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], splat (i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], splat (i32 3) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <12 x i32> @@ -267,11 +267,11 @@ define void @test_struct_store4(ptr noalias nocapture readonly %A, ptr noalias n ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], ptr [[B:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <16 x i32> @@ -362,7 +362,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -421,7 +421,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP2]] ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4 @@ -497,7 +497,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]] ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4 @@ -612,7 +612,7 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture % ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3 ; CHECK-NEXT: store i64 [[TMP28]], ptr [[TMP11]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -746,7 +746,7 @@ define void @mixed_load3_store3(ptr nocapture %A) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <12 x i32> ; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -1172,7 +1172,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0 @@ -1196,7 +1196,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1366,8 +1366,8 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i64 [[TMP4]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP4]], 5 ; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[TMP4]], 7 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1) +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] @@ -1402,7 +1402,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: store i32 [[Z]], ptr [[TMP11]], align 4 ; CHECK-NEXT: store i32 [[Z]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index 63381454cc59003..1473d292d06ac9f 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -390,14 +390,14 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[GEP]], align 4, !alias.scope [[META23:![0-9]+]] ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], +; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]]) -; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4, !alias.scope [[META27:![0-9]+]], !noalias [[META23]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -415,7 +415,7 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ITR]], [[LFTR_WIDEIV]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: for.inc8.loopexit.loopexit: ; CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT]] ; CHECK: for.inc8.loopexit: diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 7c610a568eafa31..10b6d1f7653da4a 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -203,12 +203,12 @@ define ptr @both(i32 %k) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END1]], %[[MIDDLE_BLOCK]] ], [ [[BASE]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], %[[MIDDLE_BLOCK]] ], [ [[BASE]], %[[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END2]], %[[MIDDLE_BLOCK]] ], [ undef, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[INC_LAG1:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[TMP:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INC_LAG1:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[TMP:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[INC_LAG2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_LAG1]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[TMP]] = getelementptr inbounds i32, ptr [[INC_LAG1]], i64 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[INC_PHI]], 1 @@ -308,21 +308,21 @@ define void @PR30742() { ; CHECK-NEXT: [[N_VEC7:%.*]] = sub i32 [[TMP4]], [[N_MOD_VF6]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[N_VEC7]], -8 ; CHECK-NEXT: [[IND_END8:%.*]] = add i32 [[TMP04]], [[TMP5]] -; CHECK-NEXT: br label %[[VECTOR_BODY9:.*]] -; CHECK: [[VECTOR_BODY9]]: -; CHECK-NEXT: [[INDEX10:%.*]] = phi i32 [ 0, %[[VECTOR_PH5]] ], [ [[INDEX_NEXT11:%.*]], %[[VECTOR_BODY9]] ] +; CHECK-NEXT: br label %[[VECTOR_BODY10:.*]] +; CHECK: [[VECTOR_BODY10]]: +; CHECK-NEXT: [[INDEX10:%.*]] = phi i32 [ 0, %[[VECTOR_PH5]] ], [ [[INDEX_NEXT11:%.*]], %[[VECTOR_BODY10]] ] ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i32 [[INDEX10]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT11]], [[N_VEC7]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK2:.*]], label %[[VECTOR_BODY9]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK2:.*]], label %[[VECTOR_BODY10]], {{!llvm.loop ![0-9]+}} ; CHECK: [[MIDDLE_BLOCK2]]: ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i32 [[TMP4]], [[N_VEC7]] ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END8]], -8 ; CHECK-NEXT: br i1 [[CMP_N12]], label %[[BB3:.*]], label %[[SCALAR_PH3]] ; CHECK: [[SCALAR_PH3]]: -; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i32 [ [[IND_END8]], %[[MIDDLE_BLOCK2]] ], [ [[TMP04]], %[[BB1]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i32 [ [[IND_END8]], %[[MIDDLE_BLOCK2]] ], [ [[TMP04]], %[[BB1]] ] ; CHECK-NEXT: br label %[[BB2:.*]] ; CHECK: [[BB2]]: -; CHECK-NEXT: [[TMP05:%.*]] = phi i32 [ [[BC_RESUME_VAL13]], %[[SCALAR_PH3]] ], [ [[TMP06:%.*]], %[[BB2]] ] +; CHECK-NEXT: [[TMP05:%.*]] = phi i32 [ [[BC_RESUME_VAL9]], %[[SCALAR_PH3]] ], [ [[TMP06:%.*]], %[[BB2]] ] ; CHECK-NEXT: [[TMP06]] = add i32 [[TMP05]], -8 ; CHECK-NEXT: [[TMP07:%.*]] = icmp sgt i32 [[TMP06]], 0 ; CHECK-NEXT: br i1 [[TMP07]], label %[[BB2]], label %[[BB3]], {{!llvm.loop ![0-9]+}} @@ -411,7 +411,7 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; VEC-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1002 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -463,7 +463,7 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; VEC-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1002 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll index 327ffaad63a6546..0e7bb763237017d 100644 --- a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll @@ -21,14 +21,14 @@ define void @accesses_to_struct_dereferenceable(ptr noalias %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> [[WIDE_LOAD2]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 @@ -106,7 +106,7 @@ define void @accesses_to_struct_may_not_be_dereferenceable_due_to_loop_bound(ptr ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -228,7 +228,7 @@ define void @accesses_to_struct_may_not_be_dereferenceable_access_size(ptr noali ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 80d705cbf9b9f82..b59cb66b11acf57 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -77,7 +77,7 @@ define void @bottom_tested(ptr %p, i32 %n) { ; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] ; TAILFOLD: pred.store.continue2: ; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TAILFOLD: middle.block: @@ -199,7 +199,7 @@ define i32 @early_exit_with_live_out(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <2 x i32> , ptr [[TMP2]], align 4 +; CHECK-NEXT: store <2 x i32> splat (i32 10), ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -847,8 +847,8 @@ define i32 @multiple_exit_switch(ptr %p, i32 %n) { ; CHECK-NEXT: store i16 0, ptr [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ -; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] -; CHECK-NEXT: i32 2097, label [[IF_END]] +; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] +; CHECK-NEXT: i32 2097, label [[IF_END]] ; CHECK-NEXT: ] ; CHECK: if.end: ; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] @@ -864,8 +864,8 @@ define i32 @multiple_exit_switch(ptr %p, i32 %n) { ; TAILFOLD-NEXT: store i16 0, ptr [[B]], align 4 ; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ -; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] -; TAILFOLD-NEXT: i32 2097, label [[IF_END]] +; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] +; TAILFOLD-NEXT: i32 2097, label [[IF_END]] ; TAILFOLD-NEXT: ] ; TAILFOLD: if.end: ; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] @@ -902,8 +902,8 @@ define i32 @multiple_exit_switch2(ptr %p, i32 %n) { ; CHECK-NEXT: store i16 0, ptr [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ -; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] -; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] +; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] +; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] ; CHECK-NEXT: ] ; CHECK: if.end: ; CHECK-NEXT: ret i32 0 @@ -920,8 +920,8 @@ define i32 @multiple_exit_switch2(ptr %p, i32 %n) { ; TAILFOLD-NEXT: store i16 0, ptr [[B]], align 4 ; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ -; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] -; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] +; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] +; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] ; TAILFOLD-NEXT: ] ; TAILFOLD: if.end: ; TAILFOLD-NEXT: ret i32 0 @@ -1090,7 +1090,7 @@ define void @scalar_predication(ptr %addr) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll index de298d20fc382c2..0bffac24227571c 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll @@ -21,7 +21,7 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll index 0515e6f07bf015a..c3d8a75acaaaf7c 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -30,7 +30,7 @@ define void @maxvf3() { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 14) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -47,7 +47,7 @@ define void @maxvf3() { ; CHECK-NEXT: store i8 69, ptr [[TMP6]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> , [[VEC_IND]] +; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: @@ -65,7 +65,7 @@ define void @maxvf3() { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll index 80348f9e46130f1..ca0edb3e1a46d67 100644 --- a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll @@ -26,7 +26,7 @@ define i32 @main() #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [40000 x i8], ptr addrspace(1) @Y, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr addrspace(1) [[TMP0]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [40000 x i8], ptr @X, i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll index d462d3aa650d28d..7a3f9df6ec28212 100644 --- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll +++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll @@ -45,7 +45,7 @@ define i32 @test(ptr %arr, i64 %n) { ; CHECK-NEXT: [[TMP17:%.*]] = add nsw i64 [[TMP13]], -1 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP19]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 65), ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index ad0af978f07dd1c..c45d05e21a13805 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -23,7 +23,7 @@ define i32 @test1() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -35,21 +35,21 @@ define i32 @test1() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -96,7 +96,7 @@ define i32 @test2() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -108,21 +108,21 @@ define i32 @test2() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> [[VEC_IND]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> [[VEC_IND]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -169,7 +169,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -183,25 +183,25 @@ define i32 @test3(i32 %N) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP4]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> , <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 2), <2 x i32> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -258,7 +258,7 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -270,21 +270,21 @@ define i32 @test4(i32 %N) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -520,7 +520,7 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -532,22 +532,22 @@ define i8 @outside_user_non_phi() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[PREDPHI]] to <2 x i8> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -651,14 +651,14 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]] ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -685,12 +685,12 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 @@ -740,15 +740,15 @@ define i32 @non_uniform_live_out() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 7) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <2 x i8> [[TMP4]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 26a2fb3806d3e7f..305906d9d1ef1d2 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -189,7 +189,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], +; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP16]], ptr [[TMP18]], align 4 @@ -223,7 +223,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], +; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP28]], ptr [[TMP30]], align 4 @@ -334,7 +334,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <4 x i8> , ptr [[TMP2]], align 1 +; CHECK-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -355,7 +355,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i8> , ptr [[TMP6]], align 1 +; CHECK-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -393,7 +393,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> , ptr [[TMP2]], align 1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP2]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -414,7 +414,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> , ptr [[TMP6]], align 1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> splat (i8 1), ptr [[TMP6]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 2 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -685,7 +685,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -714,7 +714,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i8> [[TMP7]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], +; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], splat (i32 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -756,7 +756,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: middle.block: @@ -785,7 +785,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[TMP7]], ptr [[TMP9]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], splat (i32 2) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index 27e5889356b092b..0f5dcca81d3718e 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -232,7 +232,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024) ; CHECK-NEXT: [[TMP0:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -251,7 +251,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll index b6f2ae3c14893a5..1cf410c359f06d2 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll @@ -28,21 +28,21 @@ define void @test(ptr %src, i64 %n) { ; CHECK: loop.32: ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, [[LOOP_2_HEADER1]] ], [ [[TMP2:%.*]], [[LOOP_32]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2000 x i32], ptr [[SRC:%.*]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP2]] = add nuw nsw <4 x i64> [[VEC_PHI3]], +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 10) +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP2]] = add nuw nsw <4 x i64> [[VEC_PHI3]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_2_LATCH4]], label [[LOOP_32]] ; CHECK: loop.2.latch4: -; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_1_LATCH5]], label [[LOOP_2_HEADER1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -73,7 +73,7 @@ define void @test(ptr %src, i64 %n) { ; CHECK: loop.1.latch: ; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 ; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[LOOP_1_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[LOOP_1_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll index 5df4fce2190a3a6..ae8f0f61231cc55 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll @@ -41,19 +41,19 @@ define void @non_outermost_loop_hcfg_construction(i64 %n, ptr %a) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[A]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> , <4 x ptr> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: br label [[INNERMOST_LOOP1:%.*]] ; CHECK: innermost.loop1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP5:%.*]], [[INNERMOST_LOOP1]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, <4 x ptr> [[WIDE_MASKED_GATHER]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT3]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_LATCH4]], label [[INNERMOST_LOOP1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -187,16 +187,16 @@ define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[INVARIANT_GEP]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> , <4 x ptr> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, <4 x ptr> [[WIDE_MASKED_GATHER]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP2]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP2]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT3]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_J0_CLEANUP4]], label [[INNERMOST_LOOP1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll index 40d0a51f9d5e875..80e7de71870b7c1 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll @@ -23,7 +23,7 @@ ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] @@ -31,15 +31,15 @@ ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> %[[InnerPhi]], -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) +; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) +; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll index 92a4ea2bbda70ed..29e633316b1e459 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll @@ -23,11 +23,11 @@ ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @A, i64 0, <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[CSplat]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[CSplat]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: br i1 %[[ZeroTripChk]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]] ; CHECK: [[InnerForPh]]: -; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[AAddr]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: br label %[[InnerForBody:.*]] @@ -35,22 +35,22 @@ ; CHECK: %[[InnerInd:.*]] = phi <4 x i64> [ zeroinitializer, %[[InnerForPh]] ], [ %[[InnerIndNext:.*]], %[[InnerForBody]] ] ; CHECK: %[[AccumPhi:.*]] = phi <4 x i32> [ %[[WideAVal]], %[[InnerForPh]] ], [ %[[AccumPhiNext:.*]], %[[InnerForBody]] ] ; CHECK: %[[BAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, <4 x i64> %[[InnerInd]] -; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[BAddr]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[BAddr]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK: %[[Add1:.*]] = add nsw <4 x i32> %[[WideBVal]], %[[VecIndTr]] ; CHECK: %[[AccumPhiNext]] = add nsw <4 x i32> %[[Add1]], %[[AccumPhi]] -; CHECK: %[[InnerIndNext]] = add nuw nsw <4 x i64> %[[InnerInd]], +; CHECK: %[[InnerIndNext]] = add nuw nsw <4 x i64> %[[InnerInd]], splat (i64 1) ; CHECK: %[[InnerVecCond:.*]] = icmp eq <4 x i64> %[[InnerIndNext]], {{.*}} ; CHECK: %[[InnerCond:.+]] = extractelement <4 x i1> %[[InnerVecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[InnerCrit:.*]], label %[[InnerForBody]] ; CHECK: [[InnerCrit]]: ; CHECK: %[[StorePhi:.*]] = phi <4 x i32> [ %[[AccumPhiNext]], %[[InnerForBody]] ] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StorePhi]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StorePhi]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: br label %[[ForInc]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], {{.*}} ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll index 4fe3f05c923330b..b857385e38535e6 100644 --- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll @@ -102,11 +102,11 @@ define void @phi_three_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> , <2 x i32> -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[PREDPHI]], <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 20) +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD2]], splat (i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 4), <2 x i32> splat (i32 5) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> splat (i32 3) +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 9) ; CHECK-NEXT: store <2 x i32> [[PREDPHI3]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 41d9c4d84202c63..531164a2c5dd051 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -32,7 +32,7 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -152,7 +152,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4 diff --git a/llvm/test/Transforms/LoopVectorize/pr35773.ll b/llvm/test/Transforms/LoopVectorize/pr35773.ll index c83a7f7705f08bc..ce6ddb2569d4564 100644 --- a/llvm/test/Transforms/LoopVectorize/pr35773.ll +++ b/llvm/test/Transforms/LoopVectorize/pr35773.ll @@ -20,8 +20,8 @@ define void @doit1(ptr %ptr) { ; CHECK-NEXT: store <4 x i32> [[I32_IV]], ptr [[GEP2]], align 4 ; CHECK-NEXT: [[MAIN_IV_NEXT]] = add nuw i32 [[MAIN_IV]], 4 -; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], -; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], +; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], splat (i32 36) +; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], splat (i8 36) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 42c4373793aa622..9f8f618f0958548 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -47,7 +47,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[TMP10]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll index d04cca0946a4f1e..fd040ec937da7ee 100644 --- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll +++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll @@ -62,7 +62,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP20]], ptr [[TMP22]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index f1075e31688cb3e..e7b87b0aea5331f 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -20,7 +20,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]] ; CHECK: pred.srem.if: @@ -37,7 +37,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]] ; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> , <2 x i16> [[TMP10]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> splat (i16 5786), <2 x i16> [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 ; CHECK-NEXT: store i16 [[TMP11]], ptr @v_39, align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -64,7 +64,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: store i16 [[COND6]], ptr @v_39, align 1 ; CHECK-NEXT: [[INC7]] = add nsw i16 [[I_07]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC7]], 111 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[RV:%.*]] = load i16, ptr @v_39, align 1 ; CHECK-NEXT: ret i16 [[RV]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll index 008971697775e48..42bd14e4b752bc3 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -41,7 +41,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[ARR]], i8 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp slt <4 x i8> [[TMP11]], [[BROADCAST_SPLAT]] @@ -49,7 +49,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; CHECK-NEXT: store <4 x i8> [[TMP15]], ptr [[TMP16]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -66,7 +66,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK-NEXT: [[T3_I8:%.*]] = zext i1 [[T3_I]] to i8 ; CHECK-NEXT: store i8 [[T3_I8]], ptr [[PTR]], align 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[T1_0_LCSSA]], [[PTR]] -; CHECK-NEXT: br i1 [[EC]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i8 [ [[IV_NEXT]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[IV_NEXT_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45525.ll b/llvm/test/Transforms/LoopVectorize/pr45525.ll index e52d98b63abfd12..25a32d9e3e32eba 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45525.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45525.ll @@ -10,7 +10,7 @@ define void @main(i1 %cond, ptr %arr) { ; CHECK: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK: [[TMP5:%.*]] = mul <4 x i32> [[VEC_IND]], +; CHECK: [[TMP5:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3) ; bb.0: br label %bb.1 diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index dc3480fbb11a88e..ee6975fa0c9e6c1 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -19,7 +19,7 @@ define void @pr45679(ptr %A) optsize { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 13) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -53,7 +53,7 @@ define void @pr45679(ptr %A) optsize { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -79,9 +79,9 @@ define void @pr45679(ptr %A) optsize { ; VF2UF2: vector.body: ; VF2UF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] ; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] -; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], -; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], -; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], +; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 13) +; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], splat (i32 13) ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF2UF2: pred.store.if: @@ -115,7 +115,7 @@ define void @pr45679(ptr %A) optsize { ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.continue6: ; VF2UF2-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; VF2UF2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2UF2: middle.block: @@ -214,7 +214,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 13) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -252,7 +252,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -279,9 +279,9 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2: vector.body: ; VF2UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] ; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] -; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], -; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], +; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 13) +; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], splat (i64 13) ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF2UF2: pred.store.if: @@ -319,7 +319,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.continue6: ; VF2UF2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2UF2: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll index 5f2298514be50e6..d7f5f646f1873c6 100644 --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -22,7 +22,7 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], splat (i32 40) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -77,7 +77,7 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP29]], [[TMP30]] ; CHECK-NEXT: [[PREVSUM]] = add nsw i16 [[SUM]], [[ADD]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[IV]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[PREVSUM_LCSSA:%.*]] = phi i16 [ [[PREVSUM]], [[LOOP]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[PREVSUM_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index bc7e3c6c6e25109..4f47e66816c9a28 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -8,7 +8,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0 @@ -18,19 +18,19 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[PREDPHI7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10) +; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> , <2 x i32> [[VEC_IND]] -; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> , <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> [[VEC_IND]] +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 9), <2 x i32> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP3]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -58,7 +58,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[P_2]] = phi i32 [ [[V_2]], [[LOOP_HEADER]] ], [ [[V_2_ADD]], [[BODY_1]] ], [ [[ADD_2]], [[BODY_2]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[IV]], 181 -; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[E_1:%.*]] = phi i32 [ [[P_1]], [[LOOP_LATCH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[E_2:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll index 2fb7f88e5341e5f..c60b216f86fa8d2 100644 --- a/llvm/test/Transforms/LoopVectorize/pr66616.ll +++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll @@ -15,7 +15,7 @@ define void @pr66616(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index e18b3d7477a16d5..4019b34217f2d57 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -24,10 +24,10 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] ; IC1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 -; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], -; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], +; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) +; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13) ; IC1-NEXT: [[TMP11:%.*]] = or <2 x i1> [[TMP7]], [[TMP4]] -; IC1-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP11]], +; IC1-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true) ; IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; IC1-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; IC1: [[PRED_STORE_IF]]: @@ -124,14 +124,14 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1 ; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1 -; IC2-NEXT: [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], -; IC2-NEXT: [[TMP14:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], -; IC2-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], -; IC2-NEXT: [[TMP8:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], +; IC2-NEXT: [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) +; IC2-NEXT: [[TMP14:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], splat (i8 -12) +; IC2-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13) +; IC2-NEXT: [[TMP8:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], splat (i8 13) ; IC2-NEXT: [[TMP21:%.*]] = or <2 x i1> [[TMP13]], [[TMP7]] ; IC2-NEXT: [[TMP22:%.*]] = or <2 x i1> [[TMP14]], [[TMP8]] -; IC2-NEXT: [[TMP19:%.*]] = xor <2 x i1> [[TMP21]], -; IC2-NEXT: [[TMP20:%.*]] = xor <2 x i1> [[TMP22]], +; IC2-NEXT: [[TMP19:%.*]] = xor <2 x i1> [[TMP21]], splat (i1 true) +; IC2-NEXT: [[TMP20:%.*]] = xor <2 x i1> [[TMP22]], splat (i1 true) ; IC2-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; IC2-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; IC2: [[PRED_STORE_IF]]: @@ -340,13 +340,13 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER_BACKEDGE:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[LOOP_HEADER_BACKEDGE]] +; IC1-NEXT: i64 120, label %[[IF_THEN1]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[LOOP_HEADER_BACKEDGE]]: +; IC1: [[IF_THEN1]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] ; IC1: [[IF_THEN:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison @@ -354,7 +354,7 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP_HEADER_BACKEDGE]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -363,13 +363,13 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER_BACKEDGE:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[LOOP_HEADER_BACKEDGE]] +; IC2-NEXT: i64 120, label %[[IF_THEN1]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[LOOP_HEADER_BACKEDGE]]: +; IC2: [[IF_THEN1]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] ; IC2: [[IF_THEN:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison @@ -377,7 +377,7 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP_HEADER_BACKEDGE]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; @@ -417,7 +417,7 @@ define void @switch_all_to_default(ptr %start) { ; IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[TMP0]] ; IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; IC1-NEXT: store <2 x i64> , ptr [[TMP2]], align 1 +; IC1-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP2]], align 1 ; IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -453,8 +453,8 @@ define void @switch_all_to_default(ptr %start) { ; IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[TMP0]] ; IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; IC2-NEXT: store <2 x i64> , ptr [[TMP4]], align 1 -; IC2-NEXT: store <2 x i64> , ptr [[TMP5]], align 1 +; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP4]], align 1 +; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP5]], align 1 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; IC2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll index 66509ffedd68ba4..2bf02b0b906f7e7 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll @@ -18,8 +18,8 @@ define void @generate_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 3) ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index 567eea8e2aeb82e..240421341626abf 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -15,7 +15,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], splat (float 5.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -129,7 +129,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -169,7 +169,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> +; CHECK-NEXT: [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> splat (float 0x47EFFFFFE0000000) ; CHECK-NEXT: [[TMP26:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP25]]) ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = call fast float @llvm.minnum.f32(float [[TMP26]], float [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -287,7 +287,7 @@ define i32 @conditional_and(ptr noalias %A, ptr noalias %B, i32 %cond, i64 nound ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP24]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP24]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: [[TMP27]] = and i32 [[TMP26]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -549,7 +549,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i64> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[TMP24]], +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[TMP24]], splat (i64 3) ; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP25]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0 ; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] @@ -588,7 +588,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap ; CHECK: pred.load.continue14: ; CHECK-NEXT: [[TMP46:%.*]] = phi <4 x i64> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ] ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP46]], <4 x i64> [[TMP24]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> splat (i64 -1) ; CHECK-NEXT: [[PREDPHI15]] = and <4 x i64> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -811,7 +811,7 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -850,7 +850,7 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP25:%.*]] = fadd fast <4 x float> [[TMP24]], [[VEC_PHI]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP25]], <4 x float> [[VEC_PHI]] -; CHECK-NEXT: [[TMP26:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP26:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], splat (float 7.000000e+00) ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0 ; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] ; CHECK: pred.load.if7: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll index 1dfd8d93d2cf1ff..3b07a76440e857b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll @@ -104,7 +104,7 @@ define i32 @reduction_smin_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) @@ -246,7 +246,7 @@ define i32 @reduction_umax_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index 89349314f27029e..6771f561913130c 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -13,7 +13,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -57,7 +57,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -100,7 +100,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -166,8 +166,8 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]]) ; CHECK-NEXT: [[TMP48]] = add i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -214,7 +214,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -257,11 +257,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 3), <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]]) ; CHECK-NEXT: [[TMP29]] = add i32 [[TMP28]], [[TMP26]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -305,7 +305,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -361,18 +361,18 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK: pred.load.continue8: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP42]] -; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) ; CHECK-NEXT: [[TMP48]] = mul i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -420,7 +420,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -484,8 +484,8 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]]) ; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -532,7 +532,7 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -588,14 +588,14 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = mul i32 [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -640,7 +640,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -696,14 +696,14 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = and i32 [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -748,7 +748,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -809,7 +809,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]]) ; CHECK-NEXT: [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -854,7 +854,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -915,7 +915,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]]) ; CHECK-NEXT: [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -960,7 +960,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1021,7 +1021,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -1066,7 +1066,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1122,14 +1122,14 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> +; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = fmul fast float [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> +; CHECK-NEXT: [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -1174,7 +1174,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1214,11 +1214,11 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> splat (i32 2147483647) ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -1261,7 +1261,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1305,7 +1305,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -1354,16 +1354,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> , <4 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 @@ -1440,8 +1440,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1488,7 +1488,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32> ; CHECK-NEXT: [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1536,7 +1536,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1583,7 +1583,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32> ; CHECK-NEXT: [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index 99a0b7080358843..f65f62bbf54bf2c 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -79,13 +79,13 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP104:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP107:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP110:%.*]], [[PRED_LOAD_CONTINUE36]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], splat (i64 257) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD_2]], splat (i64 257) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[STEP_ADD_3]], splat (i64 257) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -258,7 +258,7 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP109:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP108]]) ; CHECK-NEXT: [[TMP110]] = add i32 [[TMP109]], [[VEC_PHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272 ; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -302,7 +302,7 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16 ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i64 [[N]], -1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[COND:%.*]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLATINSERT7]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLATINSERT7]], splat (i32 7) ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer @@ -314,9 +314,9 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[PRED_LOAD_CONTINUE38]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[PRED_LOAD_CONTINUE38]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP118:%.*]], [[PRED_LOAD_CONTINUE38]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] @@ -484,20 +484,20 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE38]] ; CHECK: pred.load.continue35: ; CHECK-NEXT: [[TMP106:%.*]] = phi <4 x i32> [ [[TMP100]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP105]], [[PRED_LOAD_IF37]] ] -; CHECK-NEXT: [[TMP107:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP34]], <4 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP34]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP108:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP107]]) ; CHECK-NEXT: [[TMP109]] = mul i32 [[TMP108]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP110:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP58]], <4 x i32> +; CHECK-NEXT: [[TMP110:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP58]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP111:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP110]]) ; CHECK-NEXT: [[TMP112]] = mul i32 [[TMP111]], [[VEC_PHI4]] -; CHECK-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP82]], <4 x i32> +; CHECK-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP82]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP114:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP113]]) ; CHECK-NEXT: [[TMP115]] = mul i32 [[TMP114]], [[VEC_PHI5]] -; CHECK-NEXT: [[TMP116:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP106]], <4 x i32> +; CHECK-NEXT: [[TMP116:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP106]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP117:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP116]]) ; CHECK-NEXT: [[TMP118]] = mul i32 [[TMP117]], [[VEC_PHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP119:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 7c5d6a1edf0b4b8..fe74a7c3a9b27cb 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -69,7 +69,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -174,7 +174,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = mul i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -231,7 +231,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -689,16 +689,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> , <4 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 @@ -832,7 +832,7 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -926,7 +926,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i8> [ splat (i8 -1), [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4 @@ -1122,9 +1122,9 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], splat (i8 31) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], splat (i8 3) +; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], splat (i8 31) ; CHECK-NEXT: [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]] @@ -1207,9 +1207,9 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], splat (i8 31) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], splat (i8 3) +; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], splat (i8 31) ; CHECK-NEXT: [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) @@ -1354,8 +1354,8 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP42]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <4 x i32> [[TMP43]], zeroinitializer -; CHECK-NEXT: [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], -; CHECK-NEXT: [[DOTNOT7:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> , <4 x i1> [[TMP44]] +; CHECK-NEXT: [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], splat (i1 true) +; CHECK-NEXT: [[DOTNOT7:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> splat (i1 true), <4 x i1> [[TMP44]] ; CHECK-NEXT: [[TMP45:%.*]] = bitcast <4 x i1> [[DOTNOT7]] to i4 ; CHECK-NEXT: [[TMP46:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP45]]) ; CHECK-NEXT: [[TMP47:%.*]] = zext nneg i4 [[TMP46]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll index 7d8613b794f48c3..1e3d6dd203f4dac 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll @@ -9,8 +9,8 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; CHECK: [[VEC_PHI_1:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[ADD_5:%.+]], %vector.body ] ; CHECK: [[VEC_PHI_2:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[ADD_3:%.+]], %vector.body ] ; CHECK: icmp ule <4 x i64> -; CHECK-NEXT: [[ADD_3]] = add <4 x i32> , [[VEC_PHI_2]] -; CHECK-NEXT: [[ADD_5]] = add <4 x i32> [[VEC_PHI_1]], +; CHECK-NEXT: [[ADD_3]] = add <4 x i32> splat (i32 3), [[VEC_PHI_2]] +; CHECK-NEXT: [[ADD_5]] = add <4 x i32> [[VEC_PHI_1]], splat (i32 5) ; CHECK: select <4 x i1> {{.*}}, <4 x i32> [[ADD_5]], <4 x i32> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> [[ADD_3]], <4 x i32> ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index 7cc58edbcb408de..491a9ec0f0d4da7 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -13,7 +13,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -56,7 +56,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -98,7 +98,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -159,7 +159,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP42:%.*]] = add <4 x i32> [[TMP41]], [[TMP39]] ; CHECK-NEXT: [[TMP43]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP42]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -204,8 +204,8 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -265,7 +265,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -309,8 +309,8 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -367,10 +367,10 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP40:%.*]] = and <4 x i32> [[TMP38]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -415,7 +415,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -475,7 +475,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42]] = or <4 x i32> [[VEC_PHI]], [[TMP41]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -520,7 +520,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -580,7 +580,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42]] = xor <4 x i32> [[VEC_PHI]], [[TMP41]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -625,7 +625,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -685,7 +685,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = fadd fast <4 x float> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -730,7 +730,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -790,7 +790,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = fmul fast <4 x float> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -834,8 +834,8 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -878,7 +878,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]) ; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -920,8 +920,8 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -964,7 +964,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]) ; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 81364f1fd6aa8e6..8a8439fca439d03 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -17,7 +17,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8> ; CHECK-NEXT: [[TMP4]] = zext <4 x i8> [[TMP3]] to <4 x i32> @@ -93,10 +93,10 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> , [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[PREDPHI]] ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> ; CHECK-NEXT: [[TMP6]] = zext <4 x i8> [[TMP5]] to <4 x i32> @@ -175,8 +175,8 @@ define i32 @PR35734(i32 %x, i32 %y) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], splat (i32 -1) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1> ; CHECK-NEXT: [[TMP6]] = sext <4 x i1> [[TMP5]] to <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index 0d9918b74a2ff21..9d87a4c7035e1fd 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -229,7 +229,7 @@ for.end: ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3 ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP17:%.*]] = or disjoint <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP17:%.*]] = or disjoint <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1 @@ -248,7 +248,7 @@ for.end: ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3 ; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP33]], [[TMP16]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -516,7 +516,7 @@ define void @test_drop_poison_generating_dead_recipe(ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ [[TMP0:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], splat (i64 -31364) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 360 ; CHECK-NEXT: br i1 [[TMP1]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index d574ec720e92a5a..bbdff969ebddde0 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -25,7 +25,7 @@ define i32 @reduction_sum(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -96,7 +96,7 @@ define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 @@ -106,7 +106,7 @@ define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7]] = mul <4 x i32> [[TMP6]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -187,7 +187,7 @@ define i32 @reduction_mix(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] ; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -268,7 +268,7 @@ define i32 @reduction_mul(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP7]] = mul <4 x i32> [[TMP6]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -416,7 +416,7 @@ define i32 @reduction_and(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] @@ -760,16 +760,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> , <4 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 @@ -1059,7 +1059,7 @@ define i32 @reduction_sum_multiuse(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index d983c5138164fc7..70d9290f41bbde9 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -261,11 +261,11 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1) +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[TMP0]] @@ -278,7 +278,7 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE2]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -330,11 +330,11 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1) +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[TMP0]] @@ -347,7 +347,7 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE2]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll index e7e63e55802fe14..bb515cd583e5bc5 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -36,7 +36,7 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4 @@ -109,7 +109,7 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4 @@ -190,7 +190,7 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP10]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP16]], align 4 @@ -375,7 +375,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index 9521c0933fe8764..ccf02b96abc9d27 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -32,7 +32,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], !dbg [[DBG9]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]], !dbg [[DBG9]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !dbg [[DBG9]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD]], , !dbg [[DBG9]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00), !dbg [[DBG9]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]], !dbg [[DBG9]] ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP4]], align 4, !dbg [[DBG9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG9]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index 07e7b462e7bf983..da8e2add18a6740 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -1460,7 +1460,7 @@ define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr noca ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll b/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll index beac46d2eb9d975..0eddc9137027208 100644 --- a/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll @@ -16,7 +16,7 @@ define void @cond_call(ptr readonly %src, ptr noalias %dest, i64 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_CALL_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[WIDE_LOAD]], splat (i64 5) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_CALL_IF:%.*]], label [[PRED_CALL_CONTINUE:%.*]] ; CHECK: pred.call.if: @@ -62,7 +62,7 @@ define void @cond_call(ptr readonly %src, ptr noalias %dest, i64 %N) { ; CHECK-NEXT: store i64 [[ST_VALUE]], ptr [[ST_ADDR]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[LOOPCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[LOOPCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll index 43eeefb77449002..37a998cb7e93878 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll @@ -43,7 +43,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP16]], align 8 ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP18]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 5b140d98d83c0b4..c3931930a686eab 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -36,7 +36,7 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC1-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]] -; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-VF4-IC1-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]] ; CHECK-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF4-IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -99,8 +99,8 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]] ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] -; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], -; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-VF4-IC2-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] ; CHECK-VF4-IC2-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] ; CHECK-VF4-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -260,7 +260,7 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC1-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]] -; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-VF4-IC1-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]] ; CHECK-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF4-IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -323,8 +323,8 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD4]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]] ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] -; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], -; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-VF4-IC2-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] ; CHECK-VF4-IC2-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] ; CHECK-VF4-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -495,7 +495,7 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]] ; CHECK-VF4-IC1-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI2]], [[TMP4]] -; CHECK-VF4-IC1-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-VF4-IC1-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-VF4-IC1-NEXT: [[TMP7]] = or <4 x i1> [[VEC_PHI]], [[TMP6]] ; CHECK-VF4-IC1-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -613,8 +613,8 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] ; CHECK-VF4-IC2-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI4]], [[TMP8]] -; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], -; CHECK-VF4-IC2-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-VF4-IC2-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; CHECK-VF4-IC2-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI]], [[TMP11]] ; CHECK-VF4-IC2-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]] ; CHECK-VF4-IC2-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 @@ -905,7 +905,7 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC1-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]] -; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-VF4-IC1-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]] ; CHECK-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF4-IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -971,8 +971,8 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]] ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] -; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], -; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-VF4-IC2-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] ; CHECK-VF4-IC2-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] ; CHECK-VF4-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll index 65590a2963bc5e6..155bb5c4ab0b119 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll @@ -6,7 +6,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF2IC1: vector.body: ; CHECK-VF2IC1: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ] ; CHECK-VF2IC1: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{%.*}}, align 4 -; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], +; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 35) ; CHECK-VF2IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-VF2IC1-NEXT: br i1 [[TMP5]], label %pred.load.if, label %pred.load.continue ; CHECK-VF2IC1: pred.load.if: @@ -25,7 +25,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF2IC1-NEXT: br label %pred.load.continue2 ; CHECK-VF2IC1: pred.load.continue2: ; CHECK-VF2IC1-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %pred.load.continue ], [ [[TMP14]], %pred.load.if1 ] -; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[TMP15]], +; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[TMP15]], splat (i32 2) ; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = or <2 x i1> [[VEC_PHI]], [[TMP16]] ; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP17]], <2 x i1> [[VEC_PHI]] ; CHECK-VF2IC1: br i1 {{%.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index b3a24b472993be6..7b66440a7fdcc21 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -7,8 +7,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -20,14 +20,14 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <4 x i1> [[VEC_ICMP1]], -; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <4 x i1> [[VEC_ICMP2]], -; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <4 x i1> [[VEC_ICMP3]], -; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <4 x i1> [[VEC_ICMP4]], +; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <4 x i1> [[VEC_ICMP1]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <4 x i1> [[VEC_ICMP2]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <4 x i1> [[VEC_ICMP3]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <4 x i1> [[VEC_ICMP4]], splat (i1 true) ; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <4 x i1> [[VEC_PHI1]], [[NOT1]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <4 x i1> [[VEC_PHI2]], [[NOT2]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <4 x i1> [[VEC_PHI3]], [[NOT3]] @@ -93,7 +93,7 @@ define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[VEC_ICMP]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -127,8 +127,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -158,8 +158,8 @@ define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> -; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], +; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -189,8 +189,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> -; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], +; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -221,10 +221,10 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 ; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-VF4IC1-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], splat (i32 3) ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll index f747993c54fc999..34fc0587c5d87b8 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll @@ -11,22 +11,22 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 undef +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 undef ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -74,22 +74,22 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 poison +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 poison ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -137,22 +137,22 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 [[START]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 [[START]] ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 087a0aa429b7ed7..b7fc1e83ce23f43 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -29,8 +29,8 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], splat (i32 10) +; CHECK-NEXT: [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_PHI]], <4 x i32> splat (i32 10) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -91,7 +91,7 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 12) ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_PHI]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 6407583061e601e..d99dcbb087d4916 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -24,12 +24,12 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> , <2 x i16> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> splat (i16 1), <2 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI]], ptr [[TMP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -111,14 +111,14 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> , <2 x i16> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -208,9 +208,9 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <2 x i16> [[TMP9]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -303,14 +303,14 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[TMP14]] -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> , <2 x i16> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP19]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -387,7 +387,7 @@ define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index 54bbf3f22ec0111..9efc8d12f44a390 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -11,7 +11,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 200) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -49,11 +49,11 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[exit:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 204, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] @@ -64,8 +64,8 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: store ptr [[TMP18]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 200 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[exit]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[exit]]: +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll index 11d0aac7429797e..a757314ec7a46ad 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll @@ -45,8 +45,8 @@ while.end: ; define void @reuse_const_btc(ptr %A) optsize { ; CHECK-LABEL: @reuse_const_btc -; CHECK: {{%.*}} = icmp ule <4 x i32> {{%.*}}, -; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> , <4 x i32> +; CHECK: {{%.*}} = icmp ule <4 x i32> {{%.*}}, splat (i32 13) +; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> splat (i32 12), <4 x i32> splat (i32 13) ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll index e4572a2b0a065af..dd2fd0de9938efd 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll @@ -14,8 +14,8 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] @@ -50,7 +50,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll index 1b646200e1c7c5d..ec7f036ca27d3f3 100644 --- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll +++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll @@ -25,7 +25,7 @@ define i64 @multi_exit_1_exit_count_with_udiv_by_value_in_header(ptr %dst, i64 % ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -91,7 +91,7 @@ define i64 @multi_exit_1_exit_count_with_udiv_by_constant_in_header(ptr %dst, i6 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -158,7 +158,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -270,7 +270,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -482,7 +482,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -554,7 +554,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP9]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -626,7 +626,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_exe ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP9]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -698,7 +698,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP9]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -767,7 +767,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst, ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP8]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -835,7 +835,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_constant_in_latch(ptr %dst, i64 ; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[GEP]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP20:![0-9]+]] @@ -897,7 +897,7 @@ define void @single_exit_tc_with_udiv(ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP4]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -960,7 +960,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(ptr %dst, i64 %N ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP8]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] @@ -1027,7 +1027,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(ptr %dst, i64 ; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[GEP]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP26:![0-9]+]] @@ -1177,7 +1177,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(ptr %dst, i64 % ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -1290,7 +1290,7 @@ define i64 @multi_exit_count_with_udiv_by_value_in_latch_different_bounds_diviso ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP7]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll index 048d18ea8422d07..b5bef4aacff3cf2 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll @@ -17,7 +17,7 @@ define i32 @test_icmp_constant_op_zext(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i16 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <4 x i8> , ptr [[TMP2]], align 1 +; CHECK-NEXT: store <4 x i8> splat (i8 109), ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll index cae6ece9f9d8c9f..14608d5068ed336 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll @@ -4,24 +4,24 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_pr47927_lshr_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_pr47927_lshr_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -64,24 +64,24 @@ exit: } define void @test_shl_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_shl_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_shl_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -124,24 +124,24 @@ exit: } define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_ashr_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_ashr_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -184,8 +184,8 @@ exit: } define void @test_shl_const_shifted_op(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_shl_const_shifted_op -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_shl_const_shifted_op( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -199,7 +199,7 @@ define void @test_shl_const_shifted_op(ptr %dst, i32 %f) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> splat (i32 19), [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -249,8 +249,8 @@ exit: define void @test_lshr_by_18(ptr %A) { -; CHECK-LABEL: define void @test_lshr_by_18 -; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-LABEL: define void @test_lshr_by_18( +; CHECK-SAME: ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -264,7 +264,7 @@ define void @test_lshr_by_18(ptr %A) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 18) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -313,8 +313,8 @@ exit: } define void @test_lshr_by_4(ptr %A) { -; CHECK-LABEL: define void @test_lshr_by_4 -; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-LABEL: define void @test_lshr_by_4( +; CHECK-SAME: ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -328,12 +328,12 @@ define void @test_lshr_by_4(ptr %A) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i16> [[TMP4]], splat (i16 4) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i16> [[TMP5]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -375,19 +375,3 @@ loop: exit: ret void } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} -; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} -; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index c9fc8beb006d9b4..85011d5f446c1de 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -153,7 +153,7 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: store i16 0, ptr [[TMP6]], align 2 ; CHECK-NEXT: store i16 0, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index 4cee3e3cb6832eb..5e8b60b910aed02 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -18,7 +18,7 @@ define void @ld_div1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -78,7 +78,7 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -133,7 +133,7 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -142,12 +142,12 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -200,7 +200,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -209,7 +209,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -217,7 +217,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -273,7 +273,7 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 @@ -333,7 +333,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -342,7 +342,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -350,7 +350,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -403,7 +403,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -412,7 +412,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -420,7 +420,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -473,7 +473,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -482,7 +482,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -490,7 +490,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -546,7 +546,7 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 @@ -608,7 +608,7 @@ define void @ld_div1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -664,7 +664,7 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -673,12 +673,12 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -730,7 +730,7 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -739,12 +739,12 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -798,7 +798,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -807,7 +807,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -815,7 +815,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -872,7 +872,7 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 @@ -933,7 +933,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -942,7 +942,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -950,7 +950,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1004,7 +1004,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -1013,7 +1013,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -1021,7 +1021,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: @@ -1075,7 +1075,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -1084,7 +1084,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -1092,7 +1092,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: @@ -1149,7 +1149,7 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 @@ -1203,14 +1203,14 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = udiv <2 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = udiv <2 x i16> [[TMP4]], splat (i16 6) ; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP7]] @@ -1218,8 +1218,8 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store i16 [[TMP2]], ptr [[TMP10]], align 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index 7f8b33e97360cc1..4200ff966893841 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -18,7 +18,7 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -78,7 +78,7 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -133,7 +133,7 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -142,12 +142,12 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -200,7 +200,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -209,7 +209,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -217,7 +217,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -274,7 +274,7 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 @@ -334,7 +334,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -343,7 +343,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -351,7 +351,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -404,7 +404,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -413,7 +413,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -421,7 +421,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -473,7 +473,7 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -482,12 +482,12 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -541,7 +541,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -550,7 +550,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -558,7 +558,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -612,7 +612,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -621,7 +621,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -629,7 +629,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -683,7 +683,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -692,7 +692,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -700,7 +700,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index 098e29eb6916616..c5a8e749997548b 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -16,8 +16,8 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = urem <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP2:%.*]] = urem <8 x i64> [[TMP1]], splat (i64 3) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP2]], i32 1 @@ -50,12 +50,12 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x i64> [[TMP31]], i64 [[TMP24]], i32 5 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 6 ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <8 x i64> [[TMP33]], i64 [[TMP26]], i32 7 -; CHECK-NEXT: [[TMP35:%.*]] = add nsw <8 x i64> [[TMP34]], +; CHECK-NEXT: [[TMP35:%.*]] = add nsw <8 x i64> [[TMP34]], splat (i64 42) ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i64, ptr [[TMP36]], i32 0 ; CHECK-NEXT: store <8 x i64> [[TMP35]], ptr [[TMP37]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -108,9 +108,9 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = udiv <8 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = urem <8 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = udiv <8 x i64> [[TMP1]], splat (i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = urem <8 x i64> [[TMP2]], splat (i64 3) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP3]], i32 1 @@ -143,12 +143,12 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 5 ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <8 x i64> [[TMP33]], i64 [[TMP26]], i32 6 ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <8 x i64> [[TMP34]], i64 [[TMP27]], i32 7 -; CHECK-NEXT: [[TMP36:%.*]] = add nsw <8 x i64> [[TMP35]], +; CHECK-NEXT: [[TMP36:%.*]] = add nsw <8 x i64> [[TMP35]], splat (i64 42) ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i64, ptr [[TMP37]], i32 0 ; CHECK-NEXT: store <8 x i64> [[TMP36]], ptr [[TMP38]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -201,7 +201,7 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP1]], i32 1 @@ -234,12 +234,12 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i64> [[TMP30]], i64 [[TMP23]], i32 5 ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x i64> [[TMP31]], i64 [[TMP24]], i32 6 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 7 -; CHECK-NEXT: [[TMP34:%.*]] = add nsw <8 x i64> [[TMP33]], +; CHECK-NEXT: [[TMP34:%.*]] = add nsw <8 x i64> [[TMP33]], splat (i64 42) ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[TMP35]], i32 0 ; CHECK-NEXT: store <8 x i64> [[TMP34]], ptr [[TMP36]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -296,7 +296,7 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <8 x i64> [[TMP5]], ptr [[TMP7]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index f79772915b02489..17b94495b517cea 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -19,7 +19,7 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -58,7 +58,7 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], +; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -118,7 +118,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -154,7 +154,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) ; VF4-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0 ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i32 1 @@ -171,12 +171,12 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP11]], i32 1 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP12]], i32 2 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 3 -; VF4-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], +; VF4-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], splat (i64 42) ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP18]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: @@ -232,7 +232,7 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -272,7 +272,7 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0 ; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], +; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -338,7 +338,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -346,7 +346,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: @@ -399,7 +399,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], +; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -413,7 +413,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: @@ -469,7 +469,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 @@ -515,7 +515,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 -; VF4-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], +; VF4-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -590,7 +590,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -598,7 +598,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: @@ -651,7 +651,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], +; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -665,7 +665,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: @@ -718,7 +718,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -727,7 +727,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -735,7 +735,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: middle.block: @@ -771,7 +771,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) ; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1 @@ -788,7 +788,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], +; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -802,7 +802,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: middle.block: @@ -855,7 +855,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -864,12 +864,12 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; VF2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: middle.block: @@ -902,7 +902,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) ; VF4-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0 ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i32 1 @@ -919,12 +919,12 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP11]], i32 1 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP12]], i32 2 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 3 -; VF4-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], +; VF4-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], splat (i64 42) ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP18]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: middle.block: @@ -981,7 +981,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; VF2-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; VF2-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 @@ -1028,7 +1028,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 -; VF4-NEXT: [[TMP8:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], +; VF4-NEXT: [[TMP8:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1095,7 +1095,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -1104,7 +1104,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -1112,7 +1112,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: @@ -1149,7 +1149,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 @@ -1166,7 +1166,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 -; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1180,7 +1180,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: @@ -1234,7 +1234,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -1243,7 +1243,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -1251,7 +1251,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: @@ -1288,7 +1288,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 2) ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 @@ -1305,7 +1305,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 -; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1319,7 +1319,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index 098835afa4480a1..4bc5aee381bc9a7 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -15,8 +15,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -26,13 +26,13 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: middle.block: @@ -70,8 +70,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -89,13 +89,13 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: middle.block: @@ -163,7 +163,7 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; VF2-NEXT: [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; VF2-NEXT: [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP9]], align 8 @@ -205,8 +205,8 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -224,13 +224,13 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: @@ -291,8 +291,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -302,13 +302,13 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: @@ -346,8 +346,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -365,13 +365,13 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: @@ -434,8 +434,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -445,7 +445,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -453,8 +453,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: @@ -496,8 +496,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -515,7 +515,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -529,8 +529,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: @@ -593,8 +593,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -604,7 +604,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -612,8 +612,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF2: middle.block: @@ -655,8 +655,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -674,7 +674,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -688,8 +688,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF4: middle.block: @@ -752,8 +752,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -763,7 +763,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -771,8 +771,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: @@ -814,8 +814,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -833,7 +833,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -847,8 +847,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: @@ -911,8 +911,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -922,7 +922,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -930,8 +930,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: middle.block: @@ -973,8 +973,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -992,7 +992,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -1006,8 +1006,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: middle.block: @@ -1070,8 +1070,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -1081,7 +1081,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -1089,8 +1089,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: middle.block: @@ -1132,8 +1132,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -1151,7 +1151,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -1165,8 +1165,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: middle.block: @@ -1229,8 +1229,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -1240,7 +1240,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -1248,8 +1248,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF2: middle.block: @@ -1291,8 +1291,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -1310,7 +1310,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -1324,8 +1324,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF4: middle.block: @@ -1387,8 +1387,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1398,13 +1398,13 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: @@ -1443,8 +1443,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1462,13 +1462,13 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: @@ -1530,8 +1530,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1541,13 +1541,13 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: @@ -1586,8 +1586,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1605,13 +1605,13 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: @@ -1673,8 +1673,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1684,13 +1684,13 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF2: middle.block: @@ -1729,8 +1729,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1748,13 +1748,13 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF4: middle.block: @@ -1818,8 +1818,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -1829,7 +1829,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -1837,8 +1837,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF2: middle.block: @@ -1881,8 +1881,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -1900,7 +1900,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1914,8 +1914,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF4: middle.block: @@ -1979,8 +1979,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -1990,7 +1990,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -1998,8 +1998,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF2: middle.block: @@ -2042,8 +2042,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2061,7 +2061,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2075,8 +2075,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF4: middle.block: @@ -2140,8 +2140,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -2151,7 +2151,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -2159,8 +2159,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF2: middle.block: @@ -2203,8 +2203,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2222,7 +2222,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2236,8 +2236,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF4: middle.block: @@ -2301,8 +2301,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -2312,7 +2312,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -2320,8 +2320,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; VF2: middle.block: @@ -2364,8 +2364,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2383,7 +2383,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2397,8 +2397,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; VF4: middle.block: @@ -2462,8 +2462,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -2473,7 +2473,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -2481,8 +2481,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; VF2: middle.block: @@ -2525,8 +2525,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2544,7 +2544,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2558,8 +2558,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; VF4: middle.block: @@ -2623,8 +2623,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -2634,7 +2634,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -2642,8 +2642,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; VF2: middle.block: @@ -2686,8 +2686,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2705,7 +2705,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2719,8 +2719,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; VF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll index d79b4a7cefc2570..d7d7d5d9c5da0e6 100644 --- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll @@ -181,7 +181,7 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], ptr [[B]], ptr poison ; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[PREDPHI]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll index c9c1cd1c1817b84..e307d93b1896a87 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll @@ -20,7 +20,7 @@ define void @vector_gep_stored(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll b/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll index cb1ac49986f68c7..76d3f6ca4c5adb9 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll @@ -7,7 +7,7 @@ ; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i16, ptr %src, i32 [[IDX0]] ; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i16, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.+]] = load <4 x i16>, ptr [[GEP0]], align 2 -; CHECK-NEXT: [[FSHL:%.+]] = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD]], <4 x i16> ) +; CHECK-NEXT: [[FSHL:%.+]] = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD]], <4 x i16> splat (i16 15)) ; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i16, ptr %dst, i32 [[IDX0]] ; CHECK-NEXT: [[GEP1:%.+]] = getelementptr inbounds i16, ptr [[GEP0]], i32 0 ; CHECK-NEXT: store <4 x i16> [[FSHL]], ptr [[GEP1]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index 5e65832aba8cc4a..bb938045a541327 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -365,7 +365,7 @@ define void @test_versioned_with_non_ex_use(i32 %offset, ptr noalias %dst.1, ptr ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i32 0 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP21]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -440,7 +440,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress { ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <4 x i16> , ptr [[TMP5]], align 2 +; CHECK-NEXT: store <4 x i16> splat (i16 1), ptr [[TMP5]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -506,7 +506,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: store <4 x i16> , ptr [[TMP7]], align 2 +; CHECK-NEXT: store <4 x i16> splat (i16 -1), ptr [[TMP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll index 1552af3140e3837..7aa7293de9bbd6f 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll @@ -22,26 +22,26 @@ define void @inner_loop_reduction(ptr noalias nocapture readonly %a.in, ptr noal ; CHECK-NEXT: %[[FOR1_INDEX:.*]] = phi i64 [ 0, %[[LABEL_PR:.*]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH:.*]] ] ; CHECK: %[[VEC_INDEX:.*]] = phi <4 x i64> [ , %[[LABEL_PR]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH]] ] ; CHECK-NEXT: %[[A_PTR:.*]] = getelementptr inbounds double, ptr %a.in, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[A_PTR]], i32 8, <4 x i1> , <4 x double> poison) +; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[A_PTR]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison) ; CHECK-NEXT: %[[B_PTR:.*]] = getelementptr inbounds double, ptr %b.in, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[B_PTR]], i32 8, <4 x i1> , <4 x double> poison) +; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[B_PTR]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison) ; CHECK-NEXT: br label %[[FOR2_HEADER:.*]] ; CHECK: [[FOR2_HEADER]]: ; CHECK-NEXT: %[[FOR2_INDEX:.*]] = phi <4 x i32> [ zeroinitializer, %vector.body ], [ %[[FOR2_INDEX_NEXT:.*]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[MASKED_GATHER1]], %vector.body ], [ %[[REDUCTION_NEXT:.*]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[REDUCTION_NEXT]] = fadd <4 x double> %[[MASKED_GATHER2]], %[[REDUCTION]] -; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], -; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], +; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], splat (i32 1) +; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], splat (i32 10000) ; CHECK-NEXT: %[[EXIT_COND:.*]] = extractelement <4 x i1> %[[VEC_PTR]], i32 0 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %[[FOR1_LATCH:.*]], label %{{.*}} ; CHECK: [[FOR1_LATCH]]: ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, ptr %c.out, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> splat (i1 true)) ; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 -; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], splat (i64 4) ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll index 4241409d3935fd6..6783b9b1cba07e0 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll @@ -11,9 +11,9 @@ define void @widen_call_instruction(ptr noalias nocapture readonly %a.in, ptr no ; CHECK-NEXT: %[[FOR1_INDEX:.*]] = phi i64 [ 0, %[[LABEL_PR:.*]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH:.*]] ] ; CHECK: %[[VEC_INDEX:.*]] = phi <4 x i64> [ , %[[LABEL_PR]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH]] ] ; CHECK-NEXT: %[[A_PTR:.*]] = getelementptr inbounds double, ptr %a.in, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[A_PTR]], i32 8, <4 x i1> , <4 x double> poison) +; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[A_PTR]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison) ; CHECK-NEXT: %[[B_PTR:.*]] = getelementptr inbounds double, ptr %b.in, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[B_PTR]], i32 8, <4 x i1> , <4 x double> poison) +; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[B_PTR]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison) ; CHECK-NEXT: %[[B_SQRT:.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %[[MASKED_GATHER2]]) ; CHECK-NEXT: br label %[[FOR2_HEADER:.*]] @@ -21,17 +21,17 @@ define void @widen_call_instruction(ptr noalias nocapture readonly %a.in, ptr no ; CHECK-NEXT: %[[FOR2_INDEX:.*]] = phi <4 x i32> [ zeroinitializer, %vector.body ], [ %[[FOR2_INDEX_NEXT:.*]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[MASKED_GATHER1]], %vector.body ], [ %[[REDUCTION_NEXT:.*]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[REDUCTION_NEXT]] = fadd <4 x double> %[[B_SQRT]], %[[REDUCTION]] -; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], -; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], +; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], splat (i32 1) +; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], splat (i32 10000) ; CHECK-NEXT: %[[EXIT_COND:.*]] = extractelement <4 x i1> %[[VEC_PTR]], i32 0 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %[[FOR1_LATCH:.*]], label %{{.*}} ; CHECK: [[FOR1_LATCH]]: ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, ptr %c.out, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> splat (i1 true)) ; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 -; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], splat (i64 4) ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll index 7b65d0257a1dc39..8fb68d7413d0e47 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll @@ -27,7 +27,7 @@ define void @loop_invariant_select(ptr noalias nocapture %out, i1 %select, doubl ; CHECK: for2.header1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP2:%.*]], [[FOR2_HEADER1]] ] ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[SELECT:%.*]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP1]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP1]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true)) entry: br label %for1.header @@ -73,7 +73,7 @@ define void @outer_loop_dependant_select(ptr noalias nocapture %out, double %a, ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true)) entry: br label %for1.header @@ -120,7 +120,7 @@ define void @inner_loop_dependant_select(ptr noalias nocapture %out, double %a, ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_PHI]] to <4 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true)) entry: br label %for1.header @@ -168,7 +168,7 @@ define void @outer_and_inner_loop_dependant_select(ptr noalias nocapture %out, d ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP3]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP3]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true)) entry: br label %for1.header diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll index eb87800daea99cf..c203a2a27c81320 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll @@ -152,41 +152,41 @@ define <1 x i32> @dotproduct_i32_v8_constvector(<8 x i32> %a) { ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT1:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT2:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT1]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT2]], +; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT2]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT3]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT4]], +; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT4]], splat (i32 3) ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i32> [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 3 ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], +; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], splat (i32 4) ; CHECK-NEXT: [[TMP10:%.*]] = add <1 x i32> [[TMP7]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 4 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP11]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], +; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], splat (i32 5) ; CHECK-NEXT: [[TMP13:%.*]] = add <1 x i32> [[TMP10]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 5 ; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], +; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], splat (i32 6) ; CHECK-NEXT: [[TMP16:%.*]] = add <1 x i32> [[TMP13]], [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 6 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP17]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], +; CHECK-NEXT: [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], splat (i32 7) ; CHECK-NEXT: [[TMP19:%.*]] = add <1 x i32> [[TMP16]], [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 7 ; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i32> poison, i32 [[TMP20]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT13]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], +; CHECK-NEXT: [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], splat (i32 8) ; CHECK-NEXT: [[TMP22:%.*]] = add <1 x i32> [[TMP19]], [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x i32> [[TMP22]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP23]], <1 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll index 6bcc61a3c6291a6..d49745909b02631 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll @@ -21,9 +21,9 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) { ; RM-NEXT: store <3 x double> [[TMP0]], ptr [[A_PTR]], align 8 ; RM-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[A_PTR]], i64 3 ; RM-NEXT: store <3 x double> [[TMP1]], ptr [[VEC_GEP7]], align 8 -; RM-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[COL_LOAD2]], -; RM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[COL_LOAD4]], -; RM-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[COL_LOAD6]], +; RM-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[COL_LOAD2]], splat (double 1.000000e+00) +; RM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[COL_LOAD4]], splat (double 1.000000e+00) +; RM-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[COL_LOAD6]], splat (double 1.000000e+00) ; RM-NEXT: store <2 x double> [[TMP2]], ptr [[B_PTR]], align 8 ; RM-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, ptr [[B_PTR]], i64 2 ; RM-NEXT: store <2 x double> [[TMP3]], ptr [[VEC_GEP8]], align 8 diff --git a/llvm/test/Transforms/MemCpyOpt/form-memset.ll b/llvm/test/Transforms/MemCpyOpt/form-memset.ll index 020a72183e9ea1c..f8c4a6c6d112619 100644 --- a/llvm/test/Transforms/MemCpyOpt/form-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/form-memset.ll @@ -325,7 +325,7 @@ define void @test8() { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[STRUCT_TEST8:%.*]], align 16 -; CHECK-NEXT: store <4 x i32> , ptr [[MEMTMP]], align 16 +; CHECK-NEXT: store <4 x i32> splat (i32 -1), ptr [[MEMTMP]], align 16 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/NewGVN/completeness.ll b/llvm/test/Transforms/NewGVN/completeness.ll index 4841e2e958b281b..17592ffaf5d4329 100644 --- a/llvm/test/Transforms/NewGVN/completeness.ll +++ b/llvm/test/Transforms/NewGVN/completeness.ll @@ -119,8 +119,8 @@ define <2 x i32> @test3vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi <2 x i32> [ splat (i32 -877), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 1000), [[ENTRY]] ], [ splat (i32 10), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[PHIOFOPS]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll index 33bcab679ba91ae..4a5ce1359d25719 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -54,10 +54,10 @@ define void @loop(ptr %X, ptr %Y) { ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD8]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD8]], -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP5]], <2 x double> , <2 x double> [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x double> , <2 x double> [[WIDE_LOAD8]] +; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], splat (double 6.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD8]], splat (double 6.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP5]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD8]] ; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x double> zeroinitializer, <2 x double> [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]] @@ -152,8 +152,8 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META4:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META4]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], splat (i32 20) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META7:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index ec46ef959ce69e3..5aee938f5191f20 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -113,8 +113,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP4]], i64 1 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP6]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], -; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], splat (i64 225) +; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], splat (i64 225) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i64 1 @@ -189,8 +189,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18 ; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1 -; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], -; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], +; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], splat (i64 225) +; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], splat (i64 225) ; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i1> [[TMP41]], i64 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP43]]) ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP41]], i64 1 @@ -266,8 +266,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33 ; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0 ; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1 -; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], -; CHECK-NEXT: [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], +; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], splat (i64 225) +; CHECK-NEXT: [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], splat (i64 225) ; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x i1> [[TMP72]], i64 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP74]]) ; CHECK-NEXT: [[TMP75:%.*]] = extractelement <2 x i1> [[TMP72]], i64 1 @@ -343,8 +343,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48 ; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0 ; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1 -; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], -; CHECK-NEXT: [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], +; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], splat (i64 225) +; CHECK-NEXT: [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], splat (i64 225) ; CHECK-NEXT: [[TMP105:%.*]] = extractelement <2 x i1> [[TMP103]], i64 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP105]]) ; CHECK-NEXT: [[TMP106:%.*]] = extractelement <2 x i1> [[TMP103]], i64 1 diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll index 6e9256afc7a8df3..97d2247ab13c141 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll @@ -24,9 +24,9 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX1]], align 4 @@ -41,12 +41,12 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef ; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt <2 x double> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> ) -; CHECK-NEXT: [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> ) +; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> splat (double -0.000000e+00)) +; CHECK-NEXT: [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> splat (double -0.000000e+00)) ; CHECK-NEXT: [[TMP14]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI16]], [[TMP12]] ; CHECK-NEXT: [[TMP15]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI17]], [[TMP13]] -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> -; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> splat (double -0.000000e+00) ; CHECK-NEXT: [[TMP18]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI]], [[TMP16]] ; CHECK-NEXT: [[TMP19]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI15]], [[TMP17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 4 @@ -210,9 +210,9 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI31:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI31:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI32:%.*]] = phi <2 x double> [ [[TMP27]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI33:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI33:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX_US1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_US1]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX_US1]], align 4 @@ -229,12 +229,12 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt <2 x double> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP10]] ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP11]] -; CHECK-NEXT: [[TMP16:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP10]], <2 x double> ) -; CHECK-NEXT: [[TMP17:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP11]], <2 x double> ) +; CHECK-NEXT: [[TMP16:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP10]], <2 x double> splat (double -0.000000e+00)) +; CHECK-NEXT: [[TMP17:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP11]], <2 x double> splat (double -0.000000e+00)) ; CHECK-NEXT: [[TMP18]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI32]], [[TMP16]] ; CHECK-NEXT: [[TMP19]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI33]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP12]], <2 x double> [[TMP14]], <2 x double> -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP13]], <2 x double> [[TMP15]], <2 x double> +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP12]], <2 x double> [[TMP14]], <2 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP13]], <2 x double> [[TMP15]], <2 x double> splat (double -0.000000e+00) ; CHECK-NEXT: [[TMP22]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI]], [[TMP20]] ; CHECK-NEXT: [[TMP23]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI31]], [[TMP21]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 4 diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll index b53d0c211919b87..eba0ed2d35fe46a 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll @@ -43,8 +43,8 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[TMP12:%.*]] = sub nsw <8 x i32> [[TMP7]], [[TMP2]] ; CHECK-NEXT: [[TMP13:%.*]] = mul <8 x i32> [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i32> [[TMP12]], [[TMP10]] -; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i32> [[TMP13]], -; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i32> [[TMP13]], splat (i32 16) +; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP14]], splat (i32 16) ; CHECK-NEXT: [[TMP17:%.*]] = trunc nuw <8 x i32> [[TMP15]] to <8 x i16> ; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <8 x i32> [[TMP16]] to <8 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = sub <8 x i16> zeroinitializer, [[TMP17]] @@ -53,8 +53,8 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw <8 x i32> [[TMP7]], [[TMP2]] ; CHECK-NEXT: [[TMP23:%.*]] = mul <8 x i32> [[TMP21]], [[TMP9]] ; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[TMP22]], [[TMP10]] -; CHECK-NEXT: [[TMP25:%.*]] = lshr <8 x i32> [[TMP23]], -; CHECK-NEXT: [[TMP26:%.*]] = lshr <8 x i32> [[TMP24]], +; CHECK-NEXT: [[TMP25:%.*]] = lshr <8 x i32> [[TMP23]], splat (i32 16) +; CHECK-NEXT: [[TMP26:%.*]] = lshr <8 x i32> [[TMP24]], splat (i32 16) ; CHECK-NEXT: [[TMP27:%.*]] = trunc nuw <8 x i32> [[TMP25]] to <8 x i16> ; CHECK-NEXT: [[TMP28:%.*]] = trunc nuw <8 x i32> [[TMP26]] to <8 x i16> ; CHECK-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP27]], <8 x i16> [[TMP19]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll index 2121775224098ea..8d31a7d25ff3dae 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll @@ -72,7 +72,7 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], +; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) ; CHECK-NEXT: [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]] ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> @@ -99,9 +99,9 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 ; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]] ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], -; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], -; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], +; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15) +; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537) +; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535) ; CHECK-NEXT: [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]] ; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]] ; CHECK-NEXT: [[TMP74:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]]) diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll index 6e9abb3813aa1d2..595b0f76a418beb 100644 --- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll @@ -39,8 +39,8 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS ; CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i16>, ptr [[NEXT_GEP16]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD17]] to <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <8 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) +; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], splat (i32 15) +; CHECK-NEXT: [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> splat (i32 32767)) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr [[NEXT_GEP14]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll index 33a0eb43b70856f..0db94bea46c4022 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -62,8 +62,8 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] ; O2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]] -; O2-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; O2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], +; O2-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; O2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], splat (i32 1) ; O2-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] ; O2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]] ; O2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -107,8 +107,8 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O3-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] ; O3-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]] -; O3-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; O3-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], +; O3-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; O3-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], splat (i32 1) ; O3-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] ; O3-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]] ; O3-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll index 05e39efc7745ae9..580c13d50b1f51f 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll @@ -38,10 +38,10 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD4]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = or disjoint <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], splat (i32 65793) +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], splat (i32 65793) +; CHECK-NEXT: [[TMP6:%.*]] = or disjoint <4 x i32> [[TMP4]], splat (i32 -16777216) +; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <4 x i32> [[TMP5]], splat (i32 -16777216) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[POUT:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll index be7906ebc0a8a2b..a054d87997b1b5a 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll @@ -58,22 +58,22 @@ define dso_local void @test(ptr %start, ptr %end) #0 { ; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; AVX2-NEXT: [[TMP8:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], -; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], -; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], -; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], -; AVX2-NEXT: [[TMP14:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], -; AVX2-NEXT: [[TMP15:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], +; AVX2-NEXT: [[TMP8:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], splat (i32 -12) +; AVX2-NEXT: [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], splat (i32 -12) +; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], splat (i32 -12) +; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], splat (i32 -12) +; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], splat (i32 13) +; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], splat (i32 13) +; AVX2-NEXT: [[TMP14:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], splat (i32 13) +; AVX2-NEXT: [[TMP15:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], splat (i32 13) ; AVX2-NEXT: [[TMP16:%.*]] = or <8 x i1> [[TMP8]], [[TMP12]] ; AVX2-NEXT: [[TMP17:%.*]] = or <8 x i1> [[TMP9]], [[TMP13]] ; AVX2-NEXT: [[TMP18:%.*]] = or <8 x i1> [[TMP10]], [[TMP14]] ; AVX2-NEXT: [[TMP19:%.*]] = or <8 x i1> [[TMP11]], [[TMP15]] -; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[NEXT_GEP]], i32 4, <8 x i1> [[TMP16]]) -; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[TMP5]], i32 4, <8 x i1> [[TMP17]]) -; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[TMP6]], i32 4, <8 x i1> [[TMP18]]) -; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[TMP7]], i32 4, <8 x i1> [[TMP19]]) +; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[NEXT_GEP]], i32 4, <8 x i1> [[TMP16]]) +; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP5]], i32 4, <8 x i1> [[TMP17]]) +; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP6]], i32 4, <8 x i1> [[TMP18]]) +; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP7]], i32 4, <8 x i1> [[TMP19]]) ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX2-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX2-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll index d9101272ba3d0fb..549e4e036a747bd 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll @@ -6,26 +6,26 @@ define void @trunc_through_one_add(ptr noalias %0, ptr noalias readonly %1) { ; SSE-LABEL: @trunc_through_one_add( ; SSE-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[TMP1:%.*]], align 1 ; SSE-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i16> -; SSE-NEXT: [[TMP5:%.*]] = lshr <8 x i16> [[TMP4]], +; SSE-NEXT: [[TMP5:%.*]] = lshr <8 x i16> [[TMP4]], splat (i16 1) ; SSE-NEXT: [[TMP6:%.*]] = add nuw nsw <8 x i16> [[TMP5]], [[TMP4]] -; SSE-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[TMP6]], +; SSE-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[TMP6]], splat (i16 2) ; SSE-NEXT: store <8 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2 ; SSE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 ; SSE-NEXT: [[TMP10:%.*]] = load <8 x i8>, ptr [[TMP8]], align 1 ; SSE-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[TMP10]] to <8 x i16> -; SSE-NEXT: [[TMP12:%.*]] = lshr <8 x i16> [[TMP11]], +; SSE-NEXT: [[TMP12:%.*]] = lshr <8 x i16> [[TMP11]], splat (i16 1) ; SSE-NEXT: [[TMP13:%.*]] = add nuw nsw <8 x i16> [[TMP12]], [[TMP11]] -; SSE-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], +; SSE-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], splat (i16 2) ; SSE-NEXT: store <8 x i16> [[TMP14]], ptr [[TMP9]], align 2 ; SSE-NEXT: ret void ; ; AVX-LABEL: @trunc_through_one_add( ; AVX-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 1 ; AVX-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[TMP3]] to <16 x i16> -; AVX-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], +; AVX-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], splat (i16 1) ; AVX-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i16> [[TMP5]], [[TMP4]] -; AVX-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP6]], +; AVX-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP6]], splat (i16 2) ; AVX-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2 ; AVX-NEXT: ret void ; @@ -181,9 +181,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr ; SSE-NEXT: [[TMP6:%.*]] = load <8 x i8>, ptr [[TMP2:%.*]], align 1 ; SSE-NEXT: [[TMP7:%.*]] = zext <8 x i8> [[TMP6]] to <8 x i16> ; SSE-NEXT: [[TMP8:%.*]] = add nuw nsw <8 x i16> [[TMP7]], [[TMP5]] -; SSE-NEXT: [[TMP9:%.*]] = lshr <8 x i16> [[TMP8]], +; SSE-NEXT: [[TMP9:%.*]] = lshr <8 x i16> [[TMP8]], splat (i16 1) ; SSE-NEXT: [[TMP10:%.*]] = add nuw nsw <8 x i16> [[TMP9]], [[TMP8]] -; SSE-NEXT: [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], +; SSE-NEXT: [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], splat (i16 2) ; SSE-NEXT: store <8 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2 ; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 ; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8 @@ -193,9 +193,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr ; SSE-NEXT: [[TMP17:%.*]] = load <8 x i8>, ptr [[TMP13]], align 1 ; SSE-NEXT: [[TMP18:%.*]] = zext <8 x i8> [[TMP17]] to <8 x i16> ; SSE-NEXT: [[TMP19:%.*]] = add nuw nsw <8 x i16> [[TMP18]], [[TMP16]] -; SSE-NEXT: [[TMP20:%.*]] = lshr <8 x i16> [[TMP19]], +; SSE-NEXT: [[TMP20:%.*]] = lshr <8 x i16> [[TMP19]], splat (i16 1) ; SSE-NEXT: [[TMP21:%.*]] = add nuw nsw <8 x i16> [[TMP20]], [[TMP19]] -; SSE-NEXT: [[TMP22:%.*]] = lshr <8 x i16> [[TMP21]], +; SSE-NEXT: [[TMP22:%.*]] = lshr <8 x i16> [[TMP21]], splat (i16 2) ; SSE-NEXT: store <8 x i16> [[TMP22]], ptr [[TMP14]], align 2 ; SSE-NEXT: ret void ; @@ -205,9 +205,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr ; AVX-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[TMP2:%.*]], align 1 ; AVX-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i16> ; AVX-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i16> [[TMP7]], [[TMP5]] -; AVX-NEXT: [[TMP9:%.*]] = lshr <16 x i16> [[TMP8]], +; AVX-NEXT: [[TMP9:%.*]] = lshr <16 x i16> [[TMP8]], splat (i16 1) ; AVX-NEXT: [[TMP10:%.*]] = add nuw nsw <16 x i16> [[TMP9]], [[TMP8]] -; AVX-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], +; AVX-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], splat (i16 2) ; AVX-NEXT: store <16 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll index 1c16b37144f1ebb..fb7a4a3df74ef2f 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll @@ -15,7 +15,7 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize { ; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll index da2bd6f22df9f3a..719d6df77cb7d5e 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_32_add_16_shuffle_32_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; @@ -31,7 +31,7 @@ define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_32_add_8_shuffle_32_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; @@ -52,7 +52,7 @@ define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { define <2 x i64> @shuffle_8_add_32_shuffle_8_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_8_add_32_shuffle_8_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll index 039e735985f9fc5..930ce6a7b7d677f 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_32_add_16_shuffle_32_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; @@ -31,7 +31,7 @@ define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_32_add_8_shuffle_32_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; @@ -52,7 +52,7 @@ define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { define <2 x i64> @shuffle_8_add_32_shuffle_8_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_8_add_32_shuffle_8_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll index 5bf7be4362a8e47..92ce8eb5b5ac23a 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll @@ -41,8 +41,8 @@ define void @licm(ptr align 8 dereferenceable(8) %_M_start.i, i64 %numElem) { ; O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[INDEX]] ; O23-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -; O23-NEXT: store <2 x double> , ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]] -; O23-NEXT: store <2 x double> , ptr [[TMP2]], align 8, !tbaa [[TBAA8]] +; O23-NEXT: store <2 x double> splat (double 2.000000e+00), ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]] +; O23-NEXT: store <2 x double> splat (double 2.000000e+00), ptr [[TMP2]], align 8, !tbaa [[TBAA8]] ; O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; O23-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; O23-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll index 465b18ade1a7c31..993e949d6d2a0cb 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll @@ -19,7 +19,7 @@ define void @vdiv(ptr %a, float %b) #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast <4 x float> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast <4 x float> splat (float 1.000000e+00), [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 87d3900765490fe..ac4b56b0fd1b118 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -29,10 +29,10 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll index 841096d226f7560..428147a1383b11d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll @@ -34,35 +34,35 @@ define noundef i64 @foo(i64 noundef %0) { define void @bar(ptr noundef %0) { ; SSE-LABEL: @bar( ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[TMP0:%.*]], align 8 -; SSE-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], -; SSE-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], +; SSE-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], splat (i64 -1) +; SSE-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 44) ; SSE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 ; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; SSE-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], -; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], +; SSE-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], splat (i64 -1) +; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], splat (i64 44) ; SSE-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32 ; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8 -; SSE-NEXT: [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], -; SSE-NEXT: [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], +; SSE-NEXT: [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], splat (i64 -1) +; SSE-NEXT: [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], splat (i64 44) ; SSE-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8 ; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48 ; SSE-NEXT: [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8 -; SSE-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], -; SSE-NEXT: [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], +; SSE-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], splat (i64 -1) +; SSE-NEXT: [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], splat (i64 44) ; SSE-NEXT: store <2 x i64> [[TMP16]], ptr [[TMP13]], align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @bar( ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8 -; AVX-NEXT: [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], -; AVX-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], +; AVX-NEXT: [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], splat (i64 -1) +; AVX-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 44) ; AVX-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32 ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 -; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], -; AVX-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], +; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], splat (i64 -1) +; AVX-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], splat (i64 44) ; AVX-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP5]], align 8 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index d2850f36a80dc20..035f3f145bf8d32 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -11,7 +11,7 @@ define float @test_merge_allof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP0:%.*]] = fcmp uge <4 x float> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] @@ -168,7 +168,7 @@ define float @test_separate_allof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP0:%.*]] = fcmp uge <4 x float> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] @@ -250,7 +250,7 @@ define float @test_separate_anyof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[T_FR]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; CHECK-NEXT: [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[DOTNOT]], [[DOTNOT6]] @@ -332,7 +332,7 @@ define float @test_merge_allof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 256) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] @@ -479,7 +479,7 @@ define i32 @test_separate_allof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 256) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> @@ -552,12 +552,12 @@ define i32 @test_separate_anyof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_separate_anyof_v4si( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 1) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[RETURN:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], splat (i32 255) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; CHECK-NEXT: [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index c169bc000644ed8..254136b0b841a9c 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -280,11 +280,11 @@ define i1 @cmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], splat (double 0x3EB0C6F7A0B5ED8D) ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i1> [[TMP8]], [[SHIFT]] ; CHECK-NEXT: [[OR_COND:%.*]] = extractelement <2 x i1> [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], splat (double 1.000000e+00) ; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <2 x i1> [[TMP10]], <2 x i1> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i1> [[TMP10]], [[SHIFT2]] ; CHECK-NEXT: [[OR_COND1_NOT:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0 diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll index c966c3e7de0a243..1afd8950b02ff1f 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll @@ -30,7 +30,7 @@ define <2 x i64> @vpload_v2i64_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[PTR:%.*]], i32 1, <2 x i1> [[TMP2]], <2 x i64> poison) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -74,7 +74,7 @@ define void @vpstore_v2i64_allones_mask(<2 x i64> %val, ptr %ptr, i32 zeroext %e ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <2 x i1> [[TMP2]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll index 92b36054356b200..6eaf98f893bfab5 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll @@ -182,7 +182,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> , [[NSPLAT]] ; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m -; ALL-CONVERT-NEXT: [[SELONE:%.+]] = select <8 x i1> [[NEWM]], <8 x i32> %i1, <8 x i32> +; ALL-CONVERT-NEXT: [[SELONE:%.+]] = select <8 x i1> [[NEWM]], <8 x i32> %i1, <8 x i32> splat (i32 1) ; ALL-CONVERT-NEXT: %{{.+}} = sdiv <8 x i32> %i0, [[SELONE]] ; ALL-CONVERT-NOT: %{{.+}} = srem <8 x i32> %i0, %i1 ; ALL-CONVERT: %{{.+}} = srem <8 x i32> %i0, %{{.+}} @@ -212,10 +212,10 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> [[NEWM]], <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[ADD]]) ; ALL-CONVERT-NEXT: %{{.+}} = add i32 [[RED]], %start -; ALL-CONVERT: [[MUL:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[MUL:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 1) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[MUL]]) ; ALL-CONVERT-NEXT: %{{.+}} = mul i32 [[RED]], %start -; ALL-CONVERT: [[AND:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[AND:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 -1) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[AND]]) ; ALL-CONVERT-NEXT: %{{.+}} = and i32 [[RED]], %start ; ALL-CONVERT: [[OR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer @@ -224,13 +224,13 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT: [[XOR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[XOR]]) ; ALL-CONVERT-NEXT: %{{.+}} = xor i32 [[RED]], %start -; ALL-CONVERT: [[SMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[SMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 2147483647) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smin.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT: [[SMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[SMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 -2147483648) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smax.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT: [[UMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[UMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 -1) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[UMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umin.i32(i32 [[RED]], i32 %start) ; ALL-CONVERT: [[UMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer @@ -244,52 +244,52 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] ; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m -; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> [[NEWM]], <4 x float> %vf, <4 x float> +; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> [[NEWM]], <4 x float> %vf, <4 x float> splat (float 0x7FF8000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMIN_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMIN_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x7FF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x47EFFFFFE0000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xFFF8000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAX_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xFFF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xC7EFFFFFE0000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMINIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMINIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x7FF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minimum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMINIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMINIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x7FF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minimum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMINIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMINIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x47EFFFFFE0000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minimum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAXIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAXIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xFFF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maximum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAXIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAXIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xFFF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maximum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAXIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAXIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xC7EFFFFFE0000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maximum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float -0.000000e+00) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]]) -; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float -0.000000e+00) ; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]]) -; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 1.000000e+00) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]]) -; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 1.000000e+00) ; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]]) ; ALL-CONVERT-NEXT: ret void diff --git a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll index 0b88a65a793e92c..617d513f7126814 100644 --- a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll +++ b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -103,8 +103,8 @@ define <2 x double> @test3_reassoc(<2 x double> %a, <2 x double> %b, <2 x double define <2 x float> @test4(<2 x float> %A) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[X:%.*]] = fadd <2 x float> [[A:%.*]], -; CHECK-NEXT: [[Y:%.*]] = fadd <2 x float> [[A]], +; CHECK-NEXT: [[X:%.*]] = fadd <2 x float> [[A:%.*]], splat (float 1.000000e+00) +; CHECK-NEXT: [[Y:%.*]] = fadd <2 x float> [[A]], splat (float 1.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fsub <2 x float> [[X]], [[Y]] ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -118,7 +118,7 @@ define <2 x float> @test4(<2 x float> %A) { define <2 x float> @test5(<2 x float> %X) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], +; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 9.400000e+01) ; CHECK-NEXT: ret <2 x float> [[FACTOR]] ; %Y = fmul fast <2 x float> %X, @@ -130,7 +130,7 @@ define <2 x float> @test5(<2 x float> %X) { define <2 x float> @test5_reassoc(<2 x float> %X) { ; CHECK-LABEL: @test5_reassoc( -; CHECK-NEXT: [[Y:%.*]] = fmul reassoc <2 x float> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 4.700000e+01) ; CHECK-NEXT: [[Z:%.*]] = fadd reassoc <2 x float> [[Y]], [[Y]] ; CHECK-NEXT: ret <2 x float> [[Z]] ; @@ -143,7 +143,7 @@ define <2 x float> @test5_reassoc(<2 x float> %X) { define <2 x float> @test6(<2 x float> %X) { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], +; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <2 x float> [[FACTOR]] ; %Y = fadd fast <2 x float> %X ,%X @@ -168,7 +168,7 @@ define <2 x float> @test6_reassoc(<2 x float> %X) { define <2 x double> @test7(<2 x double> %W) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast <2 x double> [[W:%.*]], +; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast <2 x double> [[W:%.*]], splat (double 1.770000e+02) ; CHECK-NEXT: ret <2 x double> [[REASS_MUL]] ; %X = fmul fast <2 x double> %W, @@ -181,8 +181,8 @@ define <2 x double> @test7(<2 x double> %W) { define <2 x double> @test7_reassoc(<2 x double> %W) { ; CHECK-LABEL: @test7_reassoc( -; CHECK-NEXT: [[X:%.*]] = fmul reassoc <2 x double> [[W:%.*]], -; CHECK-NEXT: [[Y:%.*]] = fmul reassoc <2 x double> [[W]], +; CHECK-NEXT: [[X:%.*]] = fmul reassoc <2 x double> [[W:%.*]], splat (double 1.270000e+02) +; CHECK-NEXT: [[Y:%.*]] = fmul reassoc <2 x double> [[W]], splat (double 5.000000e+01) ; CHECK-NEXT: [[Z:%.*]] = fadd reassoc <2 x double> [[Y]], [[X]] ; CHECK-NEXT: ret <2 x double> [[Z]] ; @@ -196,7 +196,7 @@ define <2 x double> @test7_reassoc(<2 x double> %W) { define <2 x float> @test8(<2 x float> %arg) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[ARG:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[ARG:%.*]], splat (float 1.440000e+02) ; CHECK-NEXT: ret <2 x float> [[TMP2]] ; %tmp1 = fmul fast <2 x float> , %arg @@ -208,8 +208,8 @@ define <2 x float> @test8(<2 x float> %arg) { define <2 x float> @test8_reassoc(<2 x float> %arg) { ; CHECK-LABEL: @test8_reassoc( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[ARG:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[ARG:%.*]], splat (float 1.200000e+01) +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[TMP1]], splat (float 1.200000e+01) ; CHECK-NEXT: ret <2 x float> [[TMP2]] ; %tmp1 = fmul reassoc <2 x float> , %arg @@ -222,7 +222,7 @@ define <2 x float> @test8_reassoc(<2 x float> %arg) { define <2 x double> @test9(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9( ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x double> zeroinitializer, [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], splat (double 1.234000e+03) ; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = fadd fast <2 x double> %a, @@ -235,7 +235,7 @@ define <2 x double> @test9(<2 x double> %b, <2 x double> %a) { define <2 x double> @test9_unary_fneg(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9_unary_fneg( ; CHECK-NEXT: [[TMP1:%.*]] = fneg fast <2 x double> [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], splat (double 1.234000e+03) ; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = fadd fast <2 x double> %a, @@ -249,7 +249,7 @@ define <2 x double> @test9_unary_fneg(<2 x double> %b, <2 x double> %a) { define <2 x double> @test9_reassoc(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9_reassoc( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], splat (double 1.234000e+03) ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <2 x double> [[B:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc <2 x double> zeroinitializer, [[A]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc <2 x double> [[TMP3]], [[TMP2]] @@ -264,7 +264,7 @@ define <2 x double> @test9_reassoc(<2 x double> %b, <2 x double> %a) { define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9_reassoc_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], splat (double 1.234000e+03) ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <2 x double> [[B:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fneg reassoc <2 x double> [[A]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc <2 x double> [[TMP3]], [[TMP2]] @@ -281,7 +281,7 @@ define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a) define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], splat (float 4.000000e+01) ; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer ; CHECK-NEXT: ret <2 x float> [[TMP1]] @@ -295,7 +295,7 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10_unary_fneg( -; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], +; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], splat (float 4.000000e+01) ; CHECK-NEXT: [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x float> [[F]] ; @@ -310,7 +310,7 @@ define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float define <2 x float> @test10_reassoc(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10_reassoc( -; CHECK-NEXT: [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], +; CHECK-NEXT: [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], splat (float 4.000000e+01) ; CHECK-NEXT: [[C:%.*]] = fsub reassoc <2 x float> zeroinitializer, [[D]] ; CHECK-NEXT: [[E:%.*]] = fmul reassoc <2 x float> [[A:%.*]], [[C]] ; CHECK-NEXT: [[F:%.*]] = fsub reassoc <2 x float> zeroinitializer, [[E]] @@ -325,7 +325,7 @@ define <2 x float> @test10_reassoc(<2 x float> %a, <2 x float> %b, <2 x float> % define <2 x float> @test10_reassoc_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10_reassoc_unary_fneg( -; CHECK-NEXT: [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], +; CHECK-NEXT: [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], splat (float 4.000000e+01) ; CHECK-NEXT: [[C:%.*]] = fneg reassoc <2 x float> [[D]] ; CHECK-NEXT: [[E:%.*]] = fmul reassoc <2 x float> [[A:%.*]], [[C]] ; CHECK-NEXT: [[F:%.*]] = fneg reassoc <2 x float> [[E]] @@ -343,7 +343,7 @@ define <2 x float> @test10_reassoc_unary_fneg(<2 x float> %a, <2 x float> %b, <2 define <2 x double> @test11(<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x double> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast <2 x double> [[FACTOR]], +; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast <2 x double> [[FACTOR]], splat (double 2.000000e+00) ; CHECK-NEXT: ret <2 x double> [[REASS_MUL]] ; %1 = fmul fast <2 x double> %x, %y @@ -372,7 +372,7 @@ define <2 x double> @test11_reassoc(<2 x double> %x, <2 x double> %y) { define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[MUL]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[MUL]], splat (i64 5) ; CHECK-NEXT: ret <2 x i64> [[SHL]] ; %mul = mul <2 x i64> %c, %b @@ -384,7 +384,7 @@ define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) { define <4 x float> @test13(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test13( -; CHECK-NEXT: [[MUL:%.*]] = fmul fast <4 x float> [[B:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = fmul fast <4 x float> [[B:%.*]], splat (float 5.000000e+00) ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <4 x float> [[A:%.*]], [[MUL]] ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; diff --git a/llvm/test/Transforms/Reassociate/negation.ll b/llvm/test/Transforms/Reassociate/negation.ll index 14ae86fb94aaba0..81e888a9476888c 100644 --- a/llvm/test/Transforms/Reassociate/negation.ll +++ b/llvm/test/Transforms/Reassociate/negation.ll @@ -33,7 +33,7 @@ define i32 @test2(i32 %a, i32 %b, i32 %z) { define <2 x i32> @negate_vec_poisons(<2 x i32> %a, <2 x i32> %b, <2 x i32> %z) { ; CHECK-LABEL: @negate_vec_poisons( -; CHECK-NEXT: [[E:%.*]] = mul <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[E:%.*]] = mul <2 x i32> [[A:%.*]], splat (i32 40) ; CHECK-NEXT: [[F:%.*]] = mul <2 x i32> [[E]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x i32> [[F]] ; @@ -51,7 +51,7 @@ define <2 x i32> @PR57683(<2 x i32> %x) { ; CHECK-NEXT: [[PARTIAL_NEG:%.*]] = sub <2 x i32> , [[X:%.*]] ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[PARTIAL_NEG]], <2 x i32> [[X]], <2 x i32> ; CHECK-NEXT: [[X_NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X]] -; CHECK-NEXT: [[SUB:%.*]] = add <2 x i32> [[X_NEG]], +; CHECK-NEXT: [[SUB:%.*]] = add <2 x i32> [[X_NEG]], splat (i32 1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[SUB]], [[SHUF]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -67,7 +67,7 @@ define <2 x float> @PR57683_FP(<2 x float> %x) { ; CHECK-NEXT: [[PARTIAL_NEG:%.*]] = fsub reassoc nsz <2 x float> , [[X:%.*]] ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[PARTIAL_NEG]], <2 x float> [[X]], <2 x i32> ; CHECK-NEXT: [[X_NEG:%.*]] = fneg reassoc nsz <2 x float> [[X]] -; CHECK-NEXT: [[SUB:%.*]] = fadd reassoc nsz <2 x float> [[X_NEG]], +; CHECK-NEXT: [[SUB:%.*]] = fadd reassoc nsz <2 x float> [[X_NEG]], splat (float 1.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fadd reassoc nsz <2 x float> [[SUB]], [[SHUF]] ; CHECK-NEXT: ret <2 x float> [[R]] ; diff --git a/llvm/test/Transforms/Reassociate/xor_reassoc.ll b/llvm/test/Transforms/Reassociate/xor_reassoc.ll index 97efb8f4e59415f..729535a6fa0966d 100644 --- a/llvm/test/Transforms/Reassociate/xor_reassoc.ll +++ b/llvm/test/Transforms/Reassociate/xor_reassoc.ll @@ -25,8 +25,8 @@ define i32 @xor1(i32 %x) { ; define <2 x i32> @xor1_vec(<2 x i32> %x) { ; CHECK-LABEL: @xor1_vec( -; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[AND_RA]], +; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 435) +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[AND_RA]], splat (i32 435) ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; %or = or <2 x i32> %x, @@ -54,7 +54,7 @@ define i32 @xor2(i32 %x, i32 %y) { ; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y define <2 x i32> @xor2_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor2_vec( -; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 435) ; CHECK-NEXT: [[XOR2:%.*]] = xor <2 x i32> [[AND_RA]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[XOR2]] ; @@ -85,8 +85,8 @@ define i32 @xor3(i32 %x, i32 %y) { ; c3 = ~c1 ^ c2 define <2 x i32> @xor3_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor3_vec( -; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -436) +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 123) ; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[AND_RA]] ; CHECK-NEXT: ret <2 x i32> [[XOR1]] ; @@ -114,8 +114,8 @@ define i32 @xor4(i32 %x, i32 %y) { ; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2) define <2 x i32> @xor4_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor4_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -124) +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 435) ; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[AND]] ; CHECK-NEXT: ret <2 x i32> [[XOR1]] ; @@ -149,7 +149,7 @@ define i32 @xor_special1(i32 %x, i32 %y) { ; (x | c1) ^ (x & ~c1) = c1 define <2 x i32> @xor_special1_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor_special1_vec( -; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 123) ; CHECK-NEXT: ret <2 x i32> [[XOR1]] ; %or = or <2 x i32> %x, @@ -178,7 +178,7 @@ define i32 @xor_special2(i32 %x, i32 %y) { ; (x | c1) ^ (x & c1) = x ^ c1 define <2 x i32> @xor_special2_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor_special2_vec( -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 123) ; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[XOR1]] ; diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll b/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll index 28600b221989259..caf4d5274bed962 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll @@ -7,16 +7,16 @@ define void @test_vector_clobber(ptr addrspace(1) %ptr) gc "statepoint-example" ; CHECK-NEXT: entry: ; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(void ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "deopt"(i32 0, i32 2, i32 0, i32 62, i32 0, i32 13, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, ptr addrspace(1) [[PTR:%.*]], i32 0, ptr addrspace(1) [[PTR]], i32 7, ptr null), "gc-live"(ptr addrspace(1) [[PTR]]) ] ; CHECK-NEXT: [[PTR_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 0, i32 0) -; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <8 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[PTR_RELOCATED]], i64 0, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <8 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[PTR_RELOCATED]], i64 0, !is_base_value [[META0:![0-9]+]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x ptr addrspace(1)> poison, ptr addrspace(1) [[PTR_RELOCATED]], i64 0 -; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <8 x ptr addrspace(1)> [[DOTSPLATINSERT_BASE]], <8 x ptr addrspace(1)> poison, <8 x i32> zeroinitializer, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <8 x ptr addrspace(1)> [[DOTSPLATINSERT_BASE]], <8 x ptr addrspace(1)> poison, <8 x i32> zeroinitializer, !is_base_value [[META0]] ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x ptr addrspace(1)> [[DOTSPLATINSERT]], <8 x ptr addrspace(1)> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, <8 x ptr addrspace(1)> [[DOTSPLAT]], <8 x i64> ; CHECK-NEXT: [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(void ()) @bar, i32 0, i32 0, i32 0, i32 0) [ "deopt"(i32 0, i32 1, i32 0, i32 112, i32 0, i32 13, i32 0, i32 7, ptr null, i32 7, ptr null, i32 3, i32 poison, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, ptr addrspace(1) [[PTR_RELOCATED]], i32 0, ptr addrspace(1) [[PTR_RELOCATED]], i32 7, ptr null), "gc-live"(<8 x ptr addrspace(1)> [[GEP]], ptr addrspace(1) [[PTR_RELOCATED]], <8 x ptr addrspace(1)> [[DOTSPLAT_BASE]]) ] ; CHECK-NEXT: [[GEP_RELOCATED:%.*]] = call coldcc <8 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v8p1(token [[STATEPOINT_TOKEN1]], i32 2, i32 0) ; CHECK-NEXT: [[PTR_RELOCATED2:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN1]], i32 1, i32 1) ; CHECK-NEXT: [[DOTSPLAT_BASE_RELOCATED:%.*]] = call coldcc <8 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v8p1(token [[STATEPOINT_TOKEN1]], i32 2, i32 2) -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p1(<8 x ptr addrspace(1)> [[GEP_RELOCATED]], i32 4, <8 x i1> , <8 x float> poison) +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p1(<8 x ptr addrspace(1)> [[GEP_RELOCATED]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison) ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll index 1f2fbb6f53cdd8d..7cae2d93e03152e 100644 --- a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll +++ b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll @@ -55,8 +55,8 @@ entry: define <4 x i8> @range_from_lshr_vec_2(<4 x i8> %a) { ; CHECK-LABEL: @range_from_lshr_vec_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], -; CHECK-NEXT: [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], +; CHECK-NEXT: [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], splat (i8 1) +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], splat (i8 2) ; CHECK-NEXT: ret <4 x i8> [[ADD_1]] ; entry: @@ -196,7 +196,7 @@ entry: define internal <4 x i8> @test_propagate_argument(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: @test_propagate_argument( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = add <4 x i8> [[A:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <4 x i8> [[A:%.*]], splat (i8 3) ; CHECK-NEXT: ret <4 x i8> [[ADD]] ; entry: @@ -206,7 +206,7 @@ entry: define <4 x i8> @test_propagate_caller(<4 x i8> %a) { ; CHECK-LABEL: @test_propagate_caller( -; CHECK-NEXT: [[RES_1:%.*]] = call <4 x i8> @test_propagate_argument(<4 x i8> [[A:%.*]], <4 x i8> ) +; CHECK-NEXT: [[RES_1:%.*]] = call <4 x i8> @test_propagate_argument(<4 x i8> [[A:%.*]], <4 x i8> splat (i8 3)) ; CHECK-NEXT: ret <4 x i8> [[RES_1]] ; %add = add <4 x i8> , diff --git a/llvm/test/Transforms/SCCP/intrinsics.ll b/llvm/test/Transforms/SCCP/intrinsics.ll index 5edb31738e685bf..27ada6339ea06c6 100644 --- a/llvm/test/Transforms/SCCP/intrinsics.ll +++ b/llvm/test/Transforms/SCCP/intrinsics.ll @@ -127,7 +127,7 @@ define <4 x i32> @pr63380(<4 x i32> %input) { ; CHECK-LABEL: @pr63380( ; CHECK-NEXT: [[CTLZ_1:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[INPUT:%.*]], i1 false) ; CHECK-NEXT: [[CTLZ_2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[CTLZ_1]], i1 true) -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 27) ; %ctlz.1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %input, i1 false) %ctlz.2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %ctlz.1, i1 true) diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll index d9dc0459b5ced61..8c15b0820de255f 100644 --- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll @@ -310,7 +310,7 @@ define i16 @vector_binop_and_cast() { ; CHECK-LABEL: define i16 @vector_binop_and_cast() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x i16> , i16 undef, i32 0 -; CHECK-NEXT: [[REM:%.*]] = srem <8 x i16> , [[VECINIT7]] +; CHECK-NEXT: [[REM:%.*]] = srem <8 x i16> splat (i16 2), [[VECINIT7]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[REM]] to i128 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i16 ; CHECK-NEXT: ret i16 [[TMP1]] diff --git a/llvm/test/Transforms/SCCP/overdefined-ext.ll b/llvm/test/Transforms/SCCP/overdefined-ext.ll index 05819c32d522dbd..e08acd20cc2ace7 100644 --- a/llvm/test/Transforms/SCCP/overdefined-ext.ll +++ b/llvm/test/Transforms/SCCP/overdefined-ext.ll @@ -26,7 +26,7 @@ define i1 @zext_icmp(i1 %t0) { define <2 x i1> @zext_vector(<2 x i1> %t0) { ; CHECK-LABEL: @zext_vector( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[T2]] ; %t1 = zext <2 x i1> %t0 to <2 x i32> @@ -37,7 +37,7 @@ define <2 x i1> @zext_vector(<2 x i1> %t0) { define <2 x i1> @zext_vector2(<2 x i1> %t0) { ; CHECK-LABEL: @zext_vector2( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %t1 = zext <2 x i1> %t0 to <2 x i32> @@ -74,7 +74,7 @@ define i1 @sext_icmp(i1 %t0) { define <2 x i1> @sext_vector(<2 x i1> %t0) { ; CHECK-LABEL: @sext_vector( ; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[T2]] ; %t1 = sext <2 x i1> %t0 to <2 x i32> @@ -85,7 +85,7 @@ define <2 x i1> @sext_vector(<2 x i1> %t0) { define <2 x i1> @sext_vector2(<2 x i1> %t0) { ; CHECK-LABEL: @sext_vector2( ; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = add nsw <2 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = add nsw <2 x i32> [[T1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %t1 = sext <2 x i1> %t0 to <2 x i32> diff --git a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll index 92d84f71bd9d46d..f968ef6509748b4 100644 --- a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll +++ b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll @@ -47,7 +47,7 @@ define <4 x i16> @range_from_and_nuw_vec(<4 x i32> %a) { ; CHECK-LABEL: define <4 x i16> @range_from_and_nuw_vec( ; CHECK-SAME: <4 x i32> [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A]], +; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A]], splat (i32 65535) ; CHECK-NEXT: [[TRUNC1:%.*]] = trunc nuw <4 x i32> [[AND1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[TRUNC1]] ; diff --git a/llvm/test/Transforms/SCCP/vector-bitcast.ll b/llvm/test/Transforms/SCCP/vector-bitcast.ll index bed5c02d73bc515..725e3bc690ebaea 100644 --- a/llvm/test/Transforms/SCCP/vector-bitcast.ll +++ b/llvm/test/Transforms/SCCP/vector-bitcast.ll @@ -12,8 +12,8 @@ define void @foo(ptr %p) nounwind { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]] ; CHECK: while.body.i: -; CHECK-NEXT: [[VWORKEXPONENT_I_033:%.*]] = phi <4 x i32> [ [[SUB_I_I:%.*]], [[WHILE_BODY_I]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SUB_I_I]] = add <4 x i32> [[VWORKEXPONENT_I_033]], +; CHECK-NEXT: [[VWORKEXPONENT_I_033:%.*]] = phi <4 x i32> [ [[SUB_I_I:%.*]], [[WHILE_BODY_I]] ], [ splat (i32 939524096), [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUB_I_I]] = add <4 x i32> [[VWORKEXPONENT_I_033]], splat (i32 -8388608) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[SUB_I_I]] to <2 x i64> ; CHECK-NEXT: store volatile <2 x i64> zeroinitializer, ptr [[P:%.*]], align 16 ; CHECK-NEXT: br label [[WHILE_BODY_I]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll index e972955e26cb475..252fe7361f07c6c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll @@ -473,7 +473,7 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(<4 x i32> %a) define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(<4 x i32> %a) ; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const( ; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], splat (i32 5) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; { @@ -495,7 +495,7 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(<4 x i32> %a) define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2(<4 x i32> %a) ; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2( ; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], splat (i32 4) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; { diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll index 0c6a976b51a1d7d..2b5ee59aeb1638c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll @@ -12,8 +12,8 @@ define i16 @foo(i16 %in1, i16 %in2) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw <2 x i64> [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP9]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP9]], splat (i64 65535) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[TMP12]], splat (i64 65533) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: [[ZEXT3_1:%.*]] = zext i1 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll index e60e356e5cd8195..f9953df6c1735da 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll @@ -33,8 +33,8 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, ptr [[PA_ADDR_0271]], i64 4 ; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds i32, ptr [[PB_ADDR_0270]], i64 4 ; CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[TMP4]], -; CHECK-NEXT: [[NOT_I242:%.*]] = xor <4 x i32> [[TMP5]], +; CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) +; CHECK-NEXT: [[NOT_I242:%.*]] = xor <4 x i32> [[TMP5]], splat (i32 -1) ; CHECK-NEXT: [[AND_I243:%.*]] = and <4 x i32> [[NOT_I242]], [[NOT_I]] ; CHECK-NEXT: [[AND_I245:%.*]] = and <4 x i32> [[TMP4]], [[NOT_I242]] ; CHECK-NEXT: [[AND_I247:%.*]] = and <4 x i32> [[TMP5]], [[NOT_I]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll index 45030a0965e0065..ec9c01c99256b04 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll @@ -20,9 +20,9 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i64 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i64 3 -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 15) +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], splat (i32 65537) +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], splat (i32 65535) ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index 8987f34e561e6fb..7ae336e2ccee983 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -16,7 +16,7 @@ define void @PR28330(i32 %n) { ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; DEFAULT-NEXT: br label [[FOR_BODY]] @@ -28,7 +28,7 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; GATHER-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; GATHER-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; GATHER-NEXT: br label [[FOR_BODY]] @@ -40,7 +40,7 @@ define void @PR28330(i32 %n) { ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: ; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; MAX-COST-NEXT: br label [[FOR_BODY]] @@ -93,7 +93,7 @@ define void @PR32038(i32 %n) { ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; DEFAULT-NEXT: br label [[FOR_BODY]] @@ -105,7 +105,7 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; GATHER-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; GATHER-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; GATHER-NEXT: br label [[FOR_BODY]] @@ -117,7 +117,7 @@ define void @PR32038(i32 %n) { ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: ; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; MAX-COST-NEXT: br label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll index 90a30808aeeba90..a5371cd051a5b94 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll @@ -20,7 +20,7 @@ define void @f_noalias(ptr noalias nocapture %dst, ptr noalias nocapture readonl ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], splat (i32 256) ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i1> [[TMP11]] to <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP9]], <4 x i32> [[TMP12]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5f0b16048d40c80..7622f9bc5c41d94 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1257,7 +1257,7 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], +; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) ; CHECK-NEXT: [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]] ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = add nsw <16 x i32> [[TMP43]], [[TMP44]] @@ -1275,9 +1275,9 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], -; CHECK-NEXT: [[TMP61:%.*]] = and <16 x i32> [[TMP60]], -; CHECK-NEXT: [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], +; CHECK-NEXT: [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], splat (i32 15) +; CHECK-NEXT: [[TMP61:%.*]] = and <16 x i32> [[TMP60]], splat (i32 65537) +; CHECK-NEXT: [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], splat (i32 65535) ; CHECK-NEXT: [[TMP63:%.*]] = add <16 x i32> [[TMP62]], [[TMP59]] ; CHECK-NEXT: [[TMP64:%.*]] = xor <16 x i32> [[TMP63]], [[TMP62]] ; CHECK-NEXT: [[TMP65:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP64]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll index 8f6d5d8f2d7ec82..9f5744b17cb79e7 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll @@ -325,7 +325,7 @@ define void @no_version(ptr nocapture %dst, ptr nocapture readonly %src) { ; CHECK-LABEL: @no_version( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[TMP0]], splat (i32 16) ; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -1242,13 +1242,13 @@ define void @crash_no_tracked_instructions(ptr %arg, ptr %arg.2, ptr %arg.3, i1 ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], ; CHECK-NEXT: store float [[T26]], ptr [[T25]], align 4 ; CHECK-NEXT: [[T27:%.*]] = load float, ptr [[ARG_2:%.*]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], splat (float 2.000000e+01) ; CHECK-NEXT: br label [[BB30]] ; CHECK: bb30: ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x float> [ [[TMP4]], [[BB22]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[BB36:%.*]] ; CHECK: bb36: -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[TMP5]], splat (float 3.000000e+00) ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[ARG_3]], align 4 ; CHECK-NEXT: br label [[BB41:%.*]] ; CHECK: bb41: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll index 0c3c695a308cd4b..476068dcd049694 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll @@ -9,7 +9,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index 1bd63b79b0f5ca1..f79db7d7ad0cbdb 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -200,9 +200,9 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15) +; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537) +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 65535) ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index f99f6ecd333823d..1330e5557e559be 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -200,9 +200,9 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15) +; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537) +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 65535) ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll index 0eda2cbc862ff04..9910090d43eae99 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -13,7 +13,7 @@ define dso_local void @l() local_unnamed_addr { ; CHECK: bb3: ; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32 ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], splat (i16 8) ; CHECK-NEXT: br label [[BB25]] ; CHECK: bb11: ; CHECK-NEXT: [[I12:%.*]] = zext i1 undef to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll index 7b27489782fc469..6e596fccd18f390 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll @@ -8,7 +8,7 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], splat (i8 10) ; NON-POW2-NEXT: store <15 x i8> [[TMP1]], ptr [[DST]], align 1 ; NON-POW2-NEXT: ret void ; @@ -17,12 +17,12 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { ; POW2-ONLY-NEXT: entry: ; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], splat (i8 10) ; POW2-ONLY-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8 ; POW2-ONLY-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4 -; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], +; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], splat (i8 10) ; POW2-ONLY-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12 ; POW2-ONLY-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll index c18811a35c1eeb3..feb4ad865f31477 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll @@ -7,7 +7,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], splat (i32 10) ; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -18,7 +18,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], splat (i32 10) ; POW2-ONLY-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 @@ -160,7 +160,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4 ; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]] -; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], +; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], splat (i32 9) ; NON-POW2-NEXT: store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -177,7 +177,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]] -; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], +; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], splat (i32 9) ; POW2-ONLY-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4 @@ -221,7 +221,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01) ; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -232,7 +232,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01) ; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll index c9b2e0ffc15f3d7..2d94babb568742e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll @@ -568,8 +568,8 @@ define void @can_reorder_vec3_op_with_padding(ptr %A, <3 x float> %in) { ; NON-POW2-SAME: ptr [[A:%.*]], <3 x float> [[IN:%.*]]) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[TMP1:%.*]] = fsub <3 x float> [[IN]], [[IN]] -; NON-POW2-NEXT: [[TMP2:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> , <3 x float> ) -; NON-POW2-NEXT: [[TMP3:%.*]] = fmul <3 x float> [[TMP2]], +; NON-POW2-NEXT: [[TMP2:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> splat (float 2.000000e+00), <3 x float> splat (float 3.000000e+00)) +; NON-POW2-NEXT: [[TMP3:%.*]] = fmul <3 x float> [[TMP2]], splat (float 3.000000e+00) ; NON-POW2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <3 x i32> ; NON-POW2-NEXT: store <3 x float> [[TMP4]], ptr [[A]], align 4 ; NON-POW2-NEXT: ret void @@ -584,8 +584,8 @@ define void @can_reorder_vec3_op_with_padding(ptr %A, <3 x float> %in) { ; POW2-ONLY-NEXT: [[MUL6_I_I_I_I:%.*]] = fmul float [[TMP1]], 3.000000e+00 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = shufflevector <3 x float> [[IN]], <3 x float> poison, <2 x i32> ; POW2-ONLY-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP2]], [[TMP2]] -; POW2-ONLY-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> , <2 x float> ) -; POW2-ONLY-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP4]], +; POW2-ONLY-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> splat (float 2.000000e+00), <2 x float> splat (float 3.000000e+00)) +; POW2-ONLY-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP4]], splat (float 3.000000e+00) ; POW2-ONLY-NEXT: store <2 x float> [[TMP5]], ptr [[A]], align 4 ; POW2-ONLY-NEXT: store float [[MUL6_I_I_I_I]], ptr [[ARRAYIDX42_I]], align 4 ; POW2-ONLY-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll index ddeaff9327e3d2e..77371c2ac7b1cdd 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll @@ -9,8 +9,8 @@ define void @select_umin_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_umin_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -68,8 +68,8 @@ define void @select_umin_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_umin_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -104,10 +104,10 @@ define void @select_ule_ugt_mix_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_ule_ugt_mix_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -144,8 +144,8 @@ define void @select_umin_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_umin_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -169,8 +169,8 @@ define void @select_umin_ule_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_umin_ule_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -228,8 +228,8 @@ define void @select_umin_ule_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_umin_ule_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -266,8 +266,8 @@ define void @select_umin_ule_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_umin_ule_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -290,8 +290,8 @@ define void @select_smin_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_smin_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -349,8 +349,8 @@ define void @select_smin_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_smin_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -387,8 +387,8 @@ define void @select_smin_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_smin_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -411,8 +411,8 @@ define void @select_smin_sle_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_smin_sle_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -470,8 +470,8 @@ define void @select_smin_sle_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_smin_sle_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -508,8 +508,8 @@ define void @select_smin_sle_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_smin_sle_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -531,8 +531,8 @@ define void @select_umax_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_umax_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -590,8 +590,8 @@ define void @select_umax_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_umax_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -628,8 +628,8 @@ define void @select_umax_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_umax_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -652,8 +652,8 @@ define void @select_umax_uge_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_umax_uge_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -711,8 +711,8 @@ define void @select_umax_uge_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_umax_uge_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -749,8 +749,8 @@ define void @select_umax_uge_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_umax_uge_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -773,8 +773,8 @@ define void @select_smax_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_smax_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -832,8 +832,8 @@ define void @select_smax_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_smax_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -870,8 +870,8 @@ define void @select_smax_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_smax_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -895,8 +895,8 @@ define void @select_smax_sge_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_smax_sge_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -954,8 +954,8 @@ define void @select_smax_sge_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_smax_sge_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -992,8 +992,8 @@ define void @select_smax_sge_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_smax_sge_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 0fe4e6a5aa28b53..f9e415a3cefc13d 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -107,7 +107,7 @@ define void @select_uniform_ugt_7xi8(ptr %ptr, i8 %x) { ; CHECK-LABEL: @select_uniform_ugt_7xi8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i8> [[TMP0]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP0]], <4 x i8> [[TMP3]] @@ -181,7 +181,7 @@ define void @select_uniform_ugt_8xi8(ptr %ptr, i8 %x) { ; CHECK-LABEL: @select_uniform_ugt_8xi8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i8> [[TMP0]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i8> [[TMP0]], <8 x i8> [[TMP3]] @@ -258,7 +258,7 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11 ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12) -; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1) ; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP8]], <8 x i8> [[TMP0]], i64 0) ; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP10]], <4 x i8> [[TMP3]], i64 12) ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 @@ -371,7 +371,7 @@ define void @select_uniform_ugt_4xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_uniform_ugt_4xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i16> [[TMP0]], splat (i16 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP0]], <4 x i16> [[TMP3]] @@ -409,7 +409,7 @@ define void @select_uniform_ult_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_uniform_ult_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <8 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <8 x i16> [[TMP0]], splat (i16 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP0]], <8 x i16> [[TMP3]] @@ -470,7 +470,7 @@ define void @select_uniform_eq_2xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_uniform_eq_2xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP0]], splat (i32 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]] @@ -496,7 +496,7 @@ define void @select_uniform_eq_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_uniform_eq_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[TMP0]], splat (i32 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP0]], <4 x i32> [[TMP3]] @@ -533,7 +533,7 @@ define void @select_uniform_ne_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_uniform_ne_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i64> [[TMP0]], splat (i64 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP0]], <2 x i64> [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll index a122d84629c1a2c..b8bf38af3668ddf 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll @@ -14,10 +14,10 @@ define void @PR50256(ptr %a, ptr %b, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i16> [[TMP3]], splat (i16 8) ; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_8]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[TMP7]] to <8 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP8]], splat (i16 8) ; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[B]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr [[ARRAYIDX3_8]], align 2 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll index 5132e491811691e..13773bf901b9bf2 100644 --- a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll +++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll @@ -11,8 +11,8 @@ define void @fusion(ptr noalias nocapture align 256 dereferenceable(19267584) %a ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x half> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x half> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x half> [[TMP1]], splat (half 0xH5380) +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x half> [[TMP2]], splat (half 0xH57F0) ; CHECK-NEXT: store <2 x half> [[TMP3]], ptr [[TMP16]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 8c6b92b65ae050b..912d60d0cc3867c 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -46,7 +46,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = sub <2 x i32> [[TMP26]], [[TMP50]] -; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], +; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], splat (i32 16) ; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]] ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32> @@ -59,7 +59,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP40]] to <2 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP39]], [[TMP61]] -; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], +; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], splat (i32 16) ; CHECK-NEXT: [[TMP42:%.*]] = add <2 x i32> [[TMP37]], [[TMP35]] ; CHECK-NEXT: [[TMP43:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]] ; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]] @@ -77,13 +77,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP54:%.*]] = load <2 x i8>, ptr null, align 1 ; CHECK-NEXT: [[TMP55:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = sub <2 x i32> [[TMP62]], [[TMP55]] -; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP41]] to <2 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <2 x i32> [[TMP58]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = sub <2 x i32> [[TMP60]], [[TMP76]] -; CHECK-NEXT: [[TMP46:%.*]] = shl <2 x i32> [[TMP45]], +; CHECK-NEXT: [[TMP46:%.*]] = shl <2 x i32> [[TMP45]], splat (i32 16) ; CHECK-NEXT: [[TMP90:%.*]] = add <2 x i32> [[TMP46]], [[TMP59]] ; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 ; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 @@ -93,12 +93,12 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 ; CHECK-NEXT: [[TMP91:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = sub <2 x i32> [[TMP79]], [[TMP91]] -; CHECK-NEXT: [[TMP75:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP75:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) ; CHECK-NEXT: [[TMP98:%.*]] = zext <2 x i8> [[TMP75]] to <2 x i32> ; CHECK-NEXT: [[TMP100:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 ; CHECK-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP100]] to <2 x i32> ; CHECK-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP98]], [[TMP103]] -; CHECK-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], +; CHECK-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) ; CHECK-NEXT: [[TMP74:%.*]] = add <2 x i32> [[TMP70]], [[TMP65]] ; CHECK-NEXT: [[TMP78:%.*]] = extractelement <2 x i32> [[TMP90]], i32 0 ; CHECK-NEXT: [[TMP71:%.*]] = extractelement <2 x i32> [[TMP90]], i32 1 @@ -160,7 +160,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP109:%.*]] = shufflevector <4 x i8> [[TMP150]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP89:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32> ; CHECK-NEXT: [[TMP87:%.*]] = sub <2 x i32> [[TMP108]], [[TMP89]] -; CHECK-NEXT: [[TMP88:%.*]] = shl <2 x i32> [[TMP87]], +; CHECK-NEXT: [[TMP88:%.*]] = shl <2 x i32> [[TMP87]], splat (i32 16) ; CHECK-NEXT: [[TMP112:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP120:%.*]] = zext <2 x i8> [[TMP112]] to <2 x i32> ; CHECK-NEXT: [[TMP94:%.*]] = shufflevector <4 x i8> [[TMP149]], <4 x i8> poison, <2 x i32> @@ -168,7 +168,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP131:%.*]] = shufflevector <4 x i8> [[TMP150]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP132:%.*]] = zext <2 x i8> [[TMP131]] to <2 x i32> ; CHECK-NEXT: [[TMP95:%.*]] = sub <2 x i32> [[TMP128]], [[TMP132]] -; CHECK-NEXT: [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], +; CHECK-NEXT: [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], splat (i32 16) ; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 ; CHECK-NEXT: [[TMP117:%.*]] = sub <2 x i32> [[TMP97]], [[TMP120]] ; CHECK-NEXT: [[TMP105:%.*]] = add <2 x i32> [[TMP96]], [[TMP117]] @@ -204,7 +204,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP118:%.*]] = shufflevector <4 x i8> [[TMP159]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP118]] to <2 x i32> ; CHECK-NEXT: [[TMP124:%.*]] = sub <2 x i32> [[TMP115]], [[TMP134]] -; CHECK-NEXT: [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], +; CHECK-NEXT: [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], splat (i32 16) ; CHECK-NEXT: [[TMP127:%.*]] = shufflevector <4 x i8> [[TMP158]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP191:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32> ; CHECK-NEXT: [[TMP160:%.*]] = shufflevector <4 x i8> [[TMP121]], <4 x i8> poison, <2 x i32> @@ -212,7 +212,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP171:%.*]] = shufflevector <4 x i8> [[TMP159]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP172:%.*]] = zext <2 x i8> [[TMP171]] to <2 x i32> ; CHECK-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP161]], [[TMP172]] -; CHECK-NEXT: [[TMP136:%.*]] = shl <2 x i32> [[TMP135]], +; CHECK-NEXT: [[TMP136:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) ; CHECK-NEXT: [[TMP137:%.*]] = insertelement <2 x i32> [[TMP110]], i32 [[CONV33_1]], i32 1 ; CHECK-NEXT: [[TMP173:%.*]] = sub <2 x i32> [[TMP137]], [[TMP191]] ; CHECK-NEXT: [[TMP174:%.*]] = add <2 x i32> [[TMP136]], [[TMP173]] @@ -235,9 +235,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[SUB47_1]], 15 ; CHECK-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 ; CHECK-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; CHECK-NEXT: [[TMP194:%.*]] = lshr <2 x i32> [[TMP110]], -; CHECK-NEXT: [[TMP154:%.*]] = and <2 x i32> [[TMP194]], -; CHECK-NEXT: [[TMP195:%.*]] = mul <2 x i32> [[TMP154]], +; CHECK-NEXT: [[TMP154:%.*]] = lshr <2 x i32> [[TMP110]], splat (i32 15) +; CHECK-NEXT: [[TMP184:%.*]] = and <2 x i32> [[TMP154]], splat (i32 65537) +; CHECK-NEXT: [[TMP195:%.*]] = mul <2 x i32> [[TMP184]], splat (i32 65535) ; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD55]] ; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD55]], [[ADD48_1]] ; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] @@ -312,9 +312,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] ; CHECK-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_4]], [[ADD105_3]] ; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV1]] -; CHECK-NEXT: [[TMP184:%.*]] = lshr <2 x i32> [[TMP102]], -; CHECK-NEXT: [[TMP185:%.*]] = and <2 x i32> [[TMP184]], -; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP185]], +; CHECK-NEXT: [[TMP185:%.*]] = lshr <2 x i32> [[TMP102]], splat (i32 15) +; CHECK-NEXT: [[TMP193:%.*]] = and <2 x i32> [[TMP185]], splat (i32 65537) +; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP193]], splat (i32 65535) ; CHECK-NEXT: [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP183]] ; CHECK-NEXT: [[TMP188:%.*]] = xor <2 x i32> [[TMP187]], [[TMP102]] ; CHECK-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 @@ -373,7 +373,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP27:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32> ; THR15-NEXT: [[TMP28:%.*]] = sub <2 x i32> [[TMP25]], [[TMP27]] -; THR15-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], +; THR15-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], splat (i32 16) ; THR15-NEXT: [[TMP59:%.*]] = add <2 x i32> [[TMP29]], [[TMP23]] ; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP32:%.*]] = zext <2 x i8> [[TMP31]] to <2 x i32> @@ -386,7 +386,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> ; THR15-NEXT: [[TMP40:%.*]] = sub <2 x i32> [[TMP37]], [[TMP39]] -; THR15-NEXT: [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], +; THR15-NEXT: [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], splat (i32 16) ; THR15-NEXT: [[TMP76:%.*]] = add <2 x i32> [[TMP41]], [[TMP35]] ; THR15-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP76]], [[TMP59]] ; THR15-NEXT: [[TMP42:%.*]] = sub <2 x i32> [[TMP59]], [[TMP76]] @@ -404,13 +404,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP50:%.*]] = load <2 x i8>, ptr null, align 1 ; THR15-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP50]] to <2 x i32> ; THR15-NEXT: [[TMP52:%.*]] = sub <2 x i32> [[TMP49]], [[TMP51]] -; THR15-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> , i32 2) +; THR15-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) ; THR15-NEXT: [[TMP54:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> ; THR15-NEXT: [[TMP77:%.*]] = shufflevector <2 x i32> [[TMP54]], <2 x i32> poison, <2 x i32> ; THR15-NEXT: [[TMP55:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; THR15-NEXT: [[TMP56:%.*]] = zext <2 x i8> [[TMP55]] to <2 x i32> ; THR15-NEXT: [[TMP57:%.*]] = sub <2 x i32> [[TMP77]], [[TMP56]] -; THR15-NEXT: [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], +; THR15-NEXT: [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], splat (i32 16) ; THR15-NEXT: [[TMP72:%.*]] = add <2 x i32> [[TMP58]], [[TMP52]] ; THR15-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 ; THR15-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 @@ -420,12 +420,12 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP62:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 ; THR15-NEXT: [[TMP63:%.*]] = zext <2 x i8> [[TMP62]] to <2 x i32> ; THR15-NEXT: [[TMP64:%.*]] = sub <2 x i32> [[TMP61]], [[TMP63]] -; THR15-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> , <2 x i8> poison) +; THR15-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) ; THR15-NEXT: [[TMP66:%.*]] = zext <2 x i8> [[TMP65]] to <2 x i32> ; THR15-NEXT: [[TMP67:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 ; THR15-NEXT: [[TMP68:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> ; THR15-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP66]], [[TMP68]] -; THR15-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], +; THR15-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) ; THR15-NEXT: [[TMP73:%.*]] = add <2 x i32> [[TMP70]], [[TMP64]] ; THR15-NEXT: [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0 ; THR15-NEXT: [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1 @@ -487,7 +487,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP99:%.*]] = zext <2 x i8> [[TMP98]] to <2 x i32> ; THR15-NEXT: [[TMP100:%.*]] = sub <2 x i32> [[TMP97]], [[TMP99]] -; THR15-NEXT: [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], +; THR15-NEXT: [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], splat (i32 16) ; THR15-NEXT: [[TMP102:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP102]] to <2 x i32> ; THR15-NEXT: [[TMP104:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> @@ -495,7 +495,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP107:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> ; THR15-NEXT: [[TMP108:%.*]] = sub <2 x i32> [[TMP105]], [[TMP107]] -; THR15-NEXT: [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], +; THR15-NEXT: [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], splat (i32 16) ; THR15-NEXT: [[TMP110:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV33]], i32 1 ; THR15-NEXT: [[TMP111:%.*]] = sub <2 x i32> [[TMP110]], [[TMP103]] ; THR15-NEXT: [[TMP112:%.*]] = add <2 x i32> [[TMP109]], [[TMP111]] @@ -531,7 +531,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP129:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP130:%.*]] = zext <2 x i8> [[TMP129]] to <2 x i32> ; THR15-NEXT: [[TMP131:%.*]] = sub <2 x i32> [[TMP128]], [[TMP130]] -; THR15-NEXT: [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], +; THR15-NEXT: [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], splat (i32 16) ; THR15-NEXT: [[TMP138:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP139:%.*]] = zext <2 x i8> [[TMP138]] to <2 x i32> ; THR15-NEXT: [[TMP154:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> @@ -539,7 +539,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> ; THR15-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP155]], [[TMP134]] -; THR15-NEXT: [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], +; THR15-NEXT: [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) ; THR15-NEXT: [[TMP140:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV33_1]], i32 1 ; THR15-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP139]] ; THR15-NEXT: [[TMP171:%.*]] = add <2 x i32> [[TMP170]], [[TMP141]] @@ -562,9 +562,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP151]], 15 ; THR15-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 ; THR15-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; THR15-NEXT: [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], -; THR15-NEXT: [[TMP157:%.*]] = and <2 x i32> [[TMP156]], -; THR15-NEXT: [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], +; THR15-NEXT: [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], splat (i32 15) +; THR15-NEXT: [[TMP157:%.*]] = and <2 x i32> [[TMP156]], splat (i32 65537) +; THR15-NEXT: [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], splat (i32 65535) ; THR15-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_2]], [[ADD48]] ; THR15-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_2]] ; THR15-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] @@ -639,9 +639,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] ; THR15-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]] ; THR15-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]] -; THR15-NEXT: [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], -; THR15-NEXT: [[TMP180:%.*]] = and <2 x i32> [[TMP179]], -; THR15-NEXT: [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], +; THR15-NEXT: [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], splat (i32 15) +; THR15-NEXT: [[TMP180:%.*]] = and <2 x i32> [[TMP179]], splat (i32 65537) +; THR15-NEXT: [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], splat (i32 65535) ; THR15-NEXT: [[TMP182:%.*]] = add <2 x i32> [[TMP181]], [[TMP178]] ; THR15-NEXT: [[TMP183:%.*]] = xor <2 x i32> [[TMP182]], [[TMP93]] ; THR15-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll index 85e0b183956d863..da8cd38e3108d74 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll @@ -62,7 +62,7 @@ define void @fp_sub(ptr %dst, ptr %p) { ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; CHECK-NEXT: ret void ; @@ -70,7 +70,7 @@ define void @fp_sub(ptr %dst, ptr %p) { ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00) ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; @@ -156,7 +156,7 @@ define void @fp_div(ptr %dst, ptr %p) { ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01) ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; CHECK-NEXT: ret void ; @@ -164,7 +164,7 @@ define void @fp_div(ptr %dst, ptr %p) { ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01) ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; @@ -254,7 +254,7 @@ define void @fp_min(ptr %dst, ptr %p) { ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00)) ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; CHECK-NEXT: ret void ; @@ -262,7 +262,7 @@ define void @fp_min(ptr %dst, ptr %p) { ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> ) +; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00)) ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll index f07b6bbe8d6621d..9269a710c61d3be 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll @@ -9,8 +9,8 @@ define void @test(ptr %c) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> , <8 x i8> poison) -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> , <8 x i8> poison) +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) ; CHECK-NEXT: br label %[[FOR_COND:.*]] ; CHECK: [[FOR_COND]]: ; CHECK-NEXT: [[A_PROMOTED2226:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FOR_COND]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll index 3872cb0c3a340c5..fc06866c1e95534 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll @@ -12,7 +12,7 @@ define void @f(ptr %dest, i64 %i) { ; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[P1]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -46,7 +46,7 @@ define void @g(ptr %dest, i64 %i) { ; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 2048 -; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[P1]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -77,7 +77,7 @@ define void @h(ptr %dest, i32 %i) { ; CHECK-SAME: (ptr [[DEST:%.*]], i32 [[I:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr [4 x i32], ptr [[DEST]], i32 [[I]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[P1]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll index 747a1ae2b5ce4c6..d9a9e9bf1a7736d 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll @@ -7,14 +7,14 @@ define void @vec_add(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_add( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_add( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -36,14 +36,14 @@ define void @vec_sub(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_sub( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], splat (i16 17) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_sub( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], splat (i16 17) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -65,14 +65,14 @@ define void @vec_rsub(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_rsub( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> , [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> splat (i16 29), [[TMP0]] ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_rsub( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> , [[TMP0]] +; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> splat (i16 29), [[TMP0]] ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -94,14 +94,14 @@ define void @vec_mul(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_mul( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], splat (i16 7) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_mul( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], splat (i16 7) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -123,14 +123,14 @@ define void @vec_sdiv(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_sdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], splat (i16 7) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_sdiv( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], splat (i16 7) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll index 052b461b8d75012..a6c5273dc4cd0db 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll @@ -32,14 +32,14 @@ define void @vec_add(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_add( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_add( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -179,12 +179,12 @@ entry: define void @splat_store_i32_one(ptr %dest) { ; CHECK-LABEL: @splat_store_i32_one( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <4 x i32> , ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @splat_store_i32_one( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: store <4 x i32> , ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: store <4 x i32> splat (i32 1), ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll index d6dc3bcc3354c11..e50e4863e91af3f 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll @@ -10,9 +10,9 @@ define i16 @test() { ; CHECK-LABEL: define i16 @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> , i32 4) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], splat (i16 -1) ; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[T:%.*]] = trunc i32 [[TMP5]] to i16 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll index 3982a4523af40ef..7d7e09758b92218 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll @@ -47,7 +47,7 @@ define void @g(ptr %p, i1 %c) { ; CHECK-NEXT: br label [[D]] ; CHECK: d: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i8> [ , [[A]] ], [ , [[B]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: store <2 x i8> [[TMP2]], ptr [[P_0]], align 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduced-value-repeated-and-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduced-value-repeated-and-vectorized.ll index d5e1a110c6277c9..a985207fb97f44a 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reduced-value-repeated-and-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reduced-value-repeated-and-vectorized.ll @@ -5,7 +5,7 @@ define void @test() { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i64(ptr align 2 null, i64 6, <4 x i1> , i32 4) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i64(ptr align 2 null, i64 6, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr null, align 2 ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i16> [[TMP0]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[TMP2]]) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll index 7771e8369b61988..c89a82dd359637b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll @@ -6,7 +6,7 @@ define i32 @test(ptr %0, ptr %1) { ; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> ) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> splat (i8 1)) ; CHECK-NEXT: [[TMP3:%.*]] = sext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[LOAD_5]] ; CHECK-NEXT: ret i32 [[OP_RDX]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 4cc9a0124337d9d..bc24a44cecbe39b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -149,7 +149,7 @@ entry: define i64 @red_strided_ld_16xi64(ptr %ptr) { ; CHECK-LABEL: @red_strided_ld_16xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 [[PTR:%.*]], i64 16, <16 x i1> , i32 16) +; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 [[PTR:%.*]], i64 16, <16 x i1> splat (i1 true), i32 16) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]]) ; CHECK-NEXT: ret i64 [[TMP1]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll index 4788e1ef715593c..bf6e2bd91ae46f1 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll @@ -17,7 +17,7 @@ define void @test() { ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr null, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> zeroinitializer, i32 4, <2 x i1> , <2 x float> poison) +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> zeroinitializer, i32 4, <2 x i1> splat (i1 true), <2 x float> poison) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x float> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i1> [[TMP6]], <2 x i1> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index 0cf4da623a0fe9b..3d00ddf89aaa3b7 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -6,7 +6,7 @@ define i32 @test() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[IF_END_I87:%.*]] ; CHECK: if.end.i87: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> , <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> , <4 x i64> ), i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [ diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll index 9c1da08c64b7b72..fd3d4ab80b29cca 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll @@ -14,12 +14,12 @@ define void @test(ptr %a, i64 %0) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> , <2 x double> poison) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> , i32 2) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: br label %[[BB]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/scatter-vectorize-reversed.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/scatter-vectorize-reversed.ll index 98333c7b420cf04..8aa75294c4d5194 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/scatter-vectorize-reversed.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/scatter-vectorize-reversed.ll @@ -9,7 +9,7 @@ define <4 x i32> @test(<2 x i64> %v, ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> poison, ptr [[P]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x ptr> [[TMP0]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, <2 x ptr> [[TMP1]], <2 x i64> [[TMP4]] -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP2]], i32 2, <2 x i1> , <2 x i16> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP2]], i32 2, <2 x i1> splat (i1 true), <2 x i16> poison) ; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP6]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll index 9cfc5f86cb014af..063bc0458cadc69 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll @@ -8,8 +8,8 @@ define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], splat (i16 1) +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> splat (i32 65536), <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: ret i32 [[TMP7]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll index 54b7b1192ec9730..8db3a8b6ff21986 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll @@ -9,11 +9,11 @@ define <4 x i32> @test(i16 %0, i16 %1) { ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[CONV15_I:%.*]] = sext i16 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> , i32 [[CONV15_I]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP5]], <4 x i32> [[TMP7]]) -; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i32> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i32> [[TMP8]], splat (i32 65535) ; CHECK-NEXT: ret <4 x i32> [[TMP9]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll index 44d320c75fedd46..f1619c9dd034d2b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll @@ -7,8 +7,8 @@ define void @test(ptr %p, ptr noalias %s) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void @@ -81,8 +81,8 @@ define void @test1(ptr %p, ptr noalias %s, i32 %stride) { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> , i32 8) -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void @@ -162,9 +162,9 @@ define void @test2(ptr %p, ptr noalias %s, i32 %stride) { ; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void @@ -242,8 +242,8 @@ define void @test3(ptr %p, ptr noalias %s) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll index 93f5b5e46d2c334..655db54af98ac59 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll @@ -14,7 +14,7 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> , i64 [[ADD_I_I62_US]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i64> zeroinitializer, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[CLASS_A:%.*]], <2 x ptr> zeroinitializer, <2 x i64> [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP2]], i32 4, <2 x i1> , <2 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP2]], i32 4, <2 x i1> splat (i1 true), <2 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 ; CHECK-NEXT: [[CMP_I_I_I_I67_US:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll index b47168c76fa2bcd..bce0884e929258a 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll @@ -15,7 +15,7 @@ define i16 @test() { ; CHECK-NEXT: [[PEDGE_061_I:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ null, [[ENTRY]] ] ; CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr [[S]], ptr [[PEDGE_061_I]], i64 -1 ; CHECK-NEXT: [[PPREV_0_I]] = getelementptr [[S]], ptr [[PPREV_062_I]], i64 -1 -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.experimental.vp.strided.load.v2i16.p0.i64(ptr align 2 [[PPREV_0_I]], i64 4, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.experimental.vp.strided.load.v2i16.p0.i64(ptr align 2 [[PPREV_0_I]], i64 4, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1 ; CHECK-NEXT: [[CMP_I178:%.*]] = icmp ult i16 [[TMP3]], [[TMP2]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll index 8ab57cc73e646f5..d94cd59a489b34f 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll @@ -5,7 +5,7 @@ define i32 @sum_of_abs(ptr noalias %a, ptr noalias %b) { ; CHECK-LABEL: define i32 @sum_of_abs ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[A]], i64 64, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[A]], i64 64, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP0]], i1 false) ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-stores-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-stores-vectorized.ll index d4dca87bfd8e400..7a0ab8002b8c042 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-stores-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-stores-vectorized.ll @@ -9,7 +9,7 @@ define void @store_reverse(ptr %p3) { ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P3]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[TMP0]], [[TMP1]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> [[TMP2]], ptr align 8 [[ARRAYIDX2]], i64 -8, <4 x i1> , i32 4) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> [[TMP2]], ptr align 8 [[ARRAYIDX2]], i64 -8, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll index 3ca5733a6bad02a..89539739ebcb9d3 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll @@ -8,7 +8,7 @@ define i32 @test(i64 %v1, i64 %v2) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[V2]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 32) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP2]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll index 0c0c723e6699610..744e1d86b0a9ce2 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll @@ -7,9 +7,9 @@ define i32 @test() { ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> , i32 4) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], splat (i16 -1) ; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll index 4f855e20fac4dbf..21312c28891d529 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll @@ -11,18 +11,18 @@ define void @test(ptr %p, i16 %load794) { ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], splat (i32 3329) ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[ZEXT795]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP8]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[TMP12]] to <2 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP22:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], splat (i64 5039) +; CHECK-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP22]], splat (i64 24) ; CHECK-NEXT: [[TMP13:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], splat (i32 62207) ; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = trunc <2 x i32> [[TMP21]] to <2 x i16> -; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i16> [[TMP14]], +; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i16> [[TMP14]], splat (i16 -3329) ; CHECK-NEXT: [[TMP16:%.*]] = icmp slt <2 x i16> [[TMP15]], zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP15]], <2 x i16> zeroinitializer) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll index 3a1a8fb4b2e32f0..073c8847b15a1c7 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll @@ -8,7 +8,7 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], splat (i8 10) ; NON-POW2-NEXT: store <15 x i8> [[TMP1]], ptr [[DST]], align 1 ; NON-POW2-NEXT: ret void ; @@ -17,17 +17,17 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { ; POW2-ONLY-NEXT: entry: ; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], splat (i8 10) ; POW2-ONLY-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8 ; POW2-ONLY-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4 -; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], +; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], splat (i8 10) ; POW2-ONLY-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12 ; POW2-ONLY-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12 ; POW2-ONLY-NEXT: [[TMP4:%.*]] = load <2 x i8>, ptr [[GEP_SRC_12]], align 4 -; POW2-ONLY-NEXT: [[TMP5:%.*]] = mul nsw <2 x i8> [[TMP4]], +; POW2-ONLY-NEXT: [[TMP5:%.*]] = mul nsw <2 x i8> [[TMP4]], splat (i8 10) ; POW2-ONLY-NEXT: store <2 x i8> [[TMP5]], ptr [[DST_12]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14 ; POW2-ONLY-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll index 308d0e27f1ea891..7ab5e4d6cb787e5 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll @@ -7,7 +7,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], splat (i32 10) ; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -18,7 +18,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], splat (i32 10) ; POW2-ONLY-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 @@ -56,7 +56,7 @@ define void @v3_load_i32_udiv_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = udiv <3 x i32> , [[TMP0]] +; NON-POW2-NEXT: [[TMP1:%.*]] = udiv <3 x i32> splat (i32 10), [[TMP0]] ; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -170,7 +170,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4 ; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]] -; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], +; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], splat (i32 9) ; NON-POW2-NEXT: store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -187,7 +187,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]] -; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], +; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], splat (i32 9) ; POW2-ONLY-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4 @@ -231,7 +231,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01) ; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -242,7 +242,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01) ; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 @@ -467,7 +467,7 @@ define i32 @reduce_add_after_mul(ptr %src) { ; NON-POW2-LABEL: @reduce_add_after_mul( ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], +; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], splat (i32 10) ; NON-POW2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP2]]) ; NON-POW2-NEXT: ret i32 [[TMP3]] ; @@ -798,7 +798,7 @@ define float @reduce_fadd_after_fmul_of_buildvec(float %a, float %b, float %c) { ; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <3 x float> poison, float [[A:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[B:%.*]], i32 1 ; NON-POW2-NEXT: [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[C:%.*]], i32 2 -; NON-POW2-NEXT: [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], +; NON-POW2-NEXT: [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], splat (float 1.000000e+01) ; NON-POW2-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP4]]) ; NON-POW2-NEXT: ret float [[TMP5]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll index a7bb272b44dc4b9..ca93cbd698adacc 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll @@ -6,7 +6,7 @@ define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) { ; CHECK-SAME: i64 [[TMP0:%.*]], i1 [[DOTCMP_I_2:%.*]], i1 [[TMP1:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> poison, i1 [[DOTCMP_I_2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> @@ -14,7 +14,7 @@ define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) { ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i1> [[TMP8]] ; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i1> [[TMP11]] to <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], +; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: store i32 [[TMP14]], ptr [[A]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll index 9cdb7c228e38278..9e6270376ddd43b 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -16,7 +16,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP2]], <2 x i32> [[TMP1]], i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> , i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = ashr <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <4 x i32> [[TMP5]], splat (i32 6) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[ARRAYIDX372]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index d8845f20ecd72e0..5ebbb76f3d6c334 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -16,8 +16,8 @@ define void @test() #0 { ; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> , [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> splat (i64 1), [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], splat (i64 32) ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP7]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP8]], [[TMP4]] ; CHECK-NEXT: [[OP_RDX1]] = add i64 [[OP_RDX]], 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll index 8e878f3f8b80fa3..8839fc22817888b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -171,17 +171,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2) ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2) ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[R71]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 0b17e19e4fadd88..dfa918a6ea4532b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -171,17 +171,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2) ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2) ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[R71]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll index 2c16612722c6a76..6187dd9ea910d87 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll @@ -15,7 +15,7 @@ define void @and4(ptr noalias nocapture noundef writeonly %dst, ptr noalias noca ; CHECK-LABEL: @and4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i8> [[TMP0]], splat (i8 -64) ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret void ; @@ -45,7 +45,7 @@ define void @and8(ptr noalias nocapture noundef writeonly %dst, ptr noalias noca ; CHECK-LABEL: @and8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i8> [[TMP0]], splat (i8 -64) ; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret void ; @@ -95,7 +95,7 @@ define void @and16(ptr noalias nocapture noundef writeonly %dst, ptr noalias noc ; CHECK-LABEL: @and16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], splat (i8 -64) ; CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret void ; @@ -185,19 +185,19 @@ define void @and32(ptr noalias nocapture noundef writeonly %dst, ptr noalias noc ; SSE-LABEL: @and32( ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; SSE-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], +; SSE-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], splat (i8 -64) ; SSE-NEXT: store <16 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; SSE-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 16 ; SSE-NEXT: [[ARRAYIDX3_16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 16 ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX_16]], align 1 -; SSE-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], +; SSE-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], splat (i8 -64) ; SSE-NEXT: store <16 x i8> [[TMP3]], ptr [[ARRAYIDX3_16]], align 1 ; SSE-NEXT: ret void ; ; AVX-LABEL: @and32( ; AVX-NEXT: entry: ; AVX-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1 -; AVX-NEXT: [[TMP1:%.*]] = and <32 x i8> [[TMP0]], +; AVX-NEXT: [[TMP1:%.*]] = and <32 x i8> [[TMP0]], splat (i8 -64) ; AVX-NEXT: store <32 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll index 33fd3e6dc0e09f4..25652e026a8374f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll @@ -25,46 +25,46 @@ define void @sdiv_v16i32_uniformconst() { ; SSE-LABEL: @sdiv_v16i32_uniformconst( ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], +; SSE-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], +; SSE-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @sdiv_v16i32_uniformconst( ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], +; SLM-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], +; SLM-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @sdiv_v16i32_uniformconst( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], +; AVX-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @sdiv_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], +; AVX512-NEXT: [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], splat (i32 5) ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 ; AVX512-NEXT: ret void ; @@ -122,46 +122,46 @@ define void @sdiv_v16i32_uniformconst() { define void @srem_v16i32_uniformconst() { ; SSE-LABEL: @srem_v16i32_uniformconst( ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], +; SSE-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], +; SSE-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @srem_v16i32_uniformconst( ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], +; SLM-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], +; SLM-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @srem_v16i32_uniformconst( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], +; AVX-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @srem_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], +; AVX512-NEXT: [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], splat (i32 5) ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 ; AVX512-NEXT: ret void ; @@ -219,46 +219,46 @@ define void @srem_v16i32_uniformconst() { define void @udiv_v16i32_uniformconst() { ; SSE-LABEL: @udiv_v16i32_uniformconst( ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], +; SSE-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], +; SSE-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @udiv_v16i32_uniformconst( ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], +; SLM-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], +; SLM-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @udiv_v16i32_uniformconst( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], +; AVX-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @udiv_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], +; AVX512-NEXT: [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], splat (i32 5) ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 ; AVX512-NEXT: ret void ; @@ -316,46 +316,46 @@ define void @udiv_v16i32_uniformconst() { define void @urem_v16i32_uniformconst() { ; SSE-LABEL: @urem_v16i32_uniformconst( ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], +; SSE-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], +; SSE-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @urem_v16i32_uniformconst( ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], +; SLM-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], +; SLM-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @urem_v16i32_uniformconst( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], +; AVX-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @urem_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], +; AVX512-NEXT: [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], splat (i32 5) ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll index 16977c025e3eaac..45294e581e6aeac 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll @@ -854,13 +854,13 @@ define void @fshl_v2i32_uniformconst() { ; ; AVX256-LABEL: @fshl_v2i32_uniformconst( ; AVX256-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 -; AVX256-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> ) +; AVX256-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; AVX256-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 ; AVX256-NEXT: ret void ; ; AVX512-LABEL: @fshl_v2i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> ) +; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; AVX512-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll index 090a9daa6a1136f..d2002b4eedaf40d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll @@ -854,13 +854,13 @@ define void @fshr_v2i32_uniformconst() { ; ; AVX256-LABEL: @fshr_v2i32_uniformconst( ; AVX256-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 -; AVX256-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> ) +; AVX256-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; AVX256-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 ; AVX256-NEXT: ret void ; ; AVX512-LABEL: @fshr_v2i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> ) +; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; AVX512-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll index d388fd17925a168..f46a5d84a86cc92 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -13,7 +13,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll index 4af23a6c9651004..f1b094e9bbed4a1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll @@ -80,7 +80,7 @@ define i32 @ray_sphere(ptr nocapture noundef readonly %sph, ptr nocapture nounde ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP93]], i1 [[CMP94]], i1 false ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], +; CHECK-NEXT: [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0 ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1 ; CHECK-NEXT: [[OR_COND106:%.*]] = select i1 [[TMP38]], i1 true, i1 [[TMP37]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll index e8548e0467381ef..860d0ed29332c26 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll @@ -17,7 +17,7 @@ define void @test(ptr %0, i32 %add651) { ; CHECK-NEXT: [[ARRAYIDX660:%.*]] = getelementptr i8, ptr [[TMP4]], i64 7800 ; CHECK-NEXT: [[ARRAYIDX689:%.*]] = getelementptr i8, ptr [[TMP4]], i64 7816 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> , <2 x i32> @@ -25,7 +25,7 @@ define void @test(ptr %0, i32 %add651) { ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[ADD651]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP14:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP13]], <2 x i32> [[TMP10]], i64 2) -; CHECK-NEXT: [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], splat (i32 1) ; CHECK-NEXT: [[SHR685:%.*]] = lshr i32 [[TMP2]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i16> ; CHECK-NEXT: [[CONV686:%.*]] = trunc i32 [[SHR685]] to i16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll index 0b9a053d49204cb..926cc7b906efe10 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll @@ -14,7 +14,7 @@ define i32 @foo(ptr noalias nocapture %A, ptr noalias nocapture %B, double %G) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[G:%.*]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> splat (double 1.000000e+00) ; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll index 133e64e5bd00d86..54a81a3ce723cf1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll @@ -12,7 +12,7 @@ define void @foo(ptr %v0, ptr readonly %v1) { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[T14]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[T142]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[TMP3]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[T222]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[TMP5]], ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[T212]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index ae6e6723706cd9a..0b6c8f3d2562b39 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -82,7 +82,7 @@ define float @merge_anyof_v4f32_wrong_first(<4 x float> %x) { ; CHECK-LABEL: @merge_anyof_v4f32_wrong_first( ; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[OP_RDX]], float -1.000000e+00, float 1.000000e+00 @@ -109,7 +109,7 @@ define float @merge_anyof_v4f32_wrong_last(<4 x float> %x) { ; CHECK-LABEL: @merge_anyof_v4f32_wrong_last( ; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[OP_RDX]], float -1.000000e+00, float 1.000000e+00 @@ -136,7 +136,7 @@ define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) { ; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle( ; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[X3]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[OP_RDX]], i32 -1, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll index 4eb0be61b51c41c..925b348cdeec1f1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll @@ -24,11 +24,11 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %ve ; CHECK: for.body233: ; CHECK-NEXT: br i1 undef, label [[FOR_BODY233]], label [[FOR_END271]] ; CHECK: for.end271: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ , [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ splat (float 0x47EFFFFFE0000000), [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ] ; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> undef, [[TMP0]] ; CHECK-NEXT: br i1 undef, label [[IF_THEN291:%.*]], label [[RETURN]] ; CHECK: if.then291: -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 5.000000e-01) ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[TMP2]] ; CHECK-NEXT: br i1 undef, label [[IF_END332:%.*]], label [[IF_ELSE319:%.*]] ; CHECK: if.else319: @@ -40,7 +40,7 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %ve ; CHECK: if.then329: ; CHECK-NEXT: br label [[IF_END332]] ; CHECK: if.end332: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ , [[IF_THEN291]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ splat (float 0x3F847AE140000000), [[IF_THEN291]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[VERTICES:%.*]], align 4 ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index 849c1d7c192dce5..988e8985b9f151d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -24,20 +24,20 @@ define void @testfunc(ptr nocapture %dest, ptr nocapture readonly %src) { ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x float> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP5]], <2 x float> -; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP5]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP8]], <2 x float> [[TMP5]], <2 x float> splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP7]], splat (float -1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x float> , <2 x float> [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP15]], <2 x float> splat (float -0.000000e+00), <2 x float> [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP10]], i32 1 ; CHECK-NEXT: [[ADD13]] = fadd float [[TMP11]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[ADD13]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x float> [[TMP14]], <2 x float> -; CHECK-NEXT: [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP16]], -; CHECK-NEXT: [[TMP18]] = select <2 x i1> [[TMP17]], <2 x float> , <2 x float> [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP14]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP17]], <2 x float> [[TMP14]], <2 x float> splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP19:%.*]] = fcmp olt <2 x float> [[TMP20]], splat (float -1.000000e+00) +; CHECK-NEXT: [[TMP18]] = select <2 x i1> [[TMP19]], <2 x float> splat (float -1.000000e+00), <2 x float> [[TMP20]] ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index 2f69a01bb5e3704..95ae544e2c62f9f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -15,7 +15,7 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index 9b35fcaebadf955..1e31772b8e49ed8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -15,7 +15,7 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll index 1c0fa1d85f9e4af..a9f92f324d6f541 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll @@ -67,7 +67,7 @@ define void @SIM4() { ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ [[TMP3]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ] ; CHECK-NEXT: br i1 false, label [[IF_THEN157:%.*]], label [[LAND_LHS_TRUE167]] ; CHECK: if.then157: -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr undef, align 4 ; CHECK-NEXT: br label [[LAND_LHS_TRUE167]] ; CHECK: land.lhs.true167: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index 1f33676c19c1482..42ad20ff578c10f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -27,7 +27,7 @@ define void @main() { ; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double [[ADD_I276_US]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> , [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.400000e+02) ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll index 9c8569fcbdf759d..739e3964c2685ff 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -179,11 +179,11 @@ define i32 @foo4(ptr nocapture %A, i32 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> [[TMP0]], splat (double 7.900000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], splat (double 6.000000e+00) ; CHECK-NEXT: store <4 x double> [[TMP5]], ptr [[A]], align 8 ; CHECK-NEXT: ret i32 undef ; @@ -335,8 +335,8 @@ define void @cse_for_hoisted_instructions_in_preheader(ptr %dst, i32 %a, i1 %c) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> splat (i32 22), [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], splat (i32 3) ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[A]], 3 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 10 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll index d203888e0c6ee81..b42f36f105636e2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll @@ -25,12 +25,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; COUNT0-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> , <2 x i32> ; COUNT0-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]] ; COUNT0-NEXT: [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer -; COUNT0-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> +; COUNT0-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; COUNT0-NEXT: [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer ; COUNT0-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer ; COUNT0-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]] ; COUNT0-NEXT: [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer -; COUNT0-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> , <2 x double> zeroinitializer +; COUNT0-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer ; COUNT0-NEXT: store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8 ; COUNT0-NEXT: ret void ; @@ -135,12 +135,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; COUNT-1-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> , <2 x i32> ; COUNT-1-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]] ; COUNT-1-NEXT: [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer -; COUNT-1-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> +; COUNT-1-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; COUNT-1-NEXT: [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer ; COUNT-1-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer ; COUNT-1-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]] ; COUNT-1-NEXT: [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer -; COUNT-1-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> , <2 x double> zeroinitializer +; COUNT-1-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer ; COUNT-1-NEXT: store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8 ; COUNT-1-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll b/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll index feeb34e2f1145bd..22c1d7ce388ab5c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll @@ -17,13 +17,13 @@ define void @PR28457(ptr noalias nocapture align 32 %q, ptr noalias nocapture re ; SSE-NEXT: [[Q2:%.*]] = getelementptr inbounds double, ptr [[Q:%.*]], i64 2 ; SSE-NEXT: [[Q4:%.*]] = getelementptr inbounds double, ptr [[Q]], i64 4 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[P]], align 8 -; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], splat (double 1.000000e+00) ; SSE-NEXT: store <2 x double> [[TMP3]], ptr [[Q]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[P2]], align 8 -; SSE-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], +; SSE-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00) ; SSE-NEXT: store <2 x double> [[TMP7]], ptr [[Q2]], align 8 ; SSE-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[P4]], align 8 -; SSE-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], +; SSE-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], splat (double 1.000000e+00) ; SSE-NEXT: store <2 x double> [[TMP11]], ptr [[Q4]], align 8 ; SSE-NEXT: ret void ; @@ -31,10 +31,10 @@ define void @PR28457(ptr noalias nocapture align 32 %q, ptr noalias nocapture re ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 4 ; AVX-NEXT: [[Q4:%.*]] = getelementptr inbounds double, ptr [[Q:%.*]], i64 4 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr [[P]], align 8 -; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], +; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], splat (double 1.000000e+00) ; AVX-NEXT: store <4 x double> [[TMP3]], ptr [[Q]], align 8 ; AVX-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[P4]], align 8 -; AVX-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], +; AVX-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00) ; AVX-NEXT: store <2 x double> [[TMP7]], ptr [[Q4]], align 8 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll index 3d3d00f4a0b3fdc..fb25ff975adc2e0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll @@ -12,8 +12,8 @@ define void @test() { ; CHECK: loop: ; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX25:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i64> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[LOOP]] ] -; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], -; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX16:%.*]] = add i64 [[TMP9]], [[TMP8]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll index 597ccd712e0a691..92157b24c6bb059 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll @@ -31,9 +31,9 @@ define double @ext_user(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 ; CHECK: for.body: ; CHECK-NEXT: [[I_020:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], splat (double 1.000000e+01) +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], splat (double 4.000000e+00) +; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], splat (double 4.000000e+00) ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_020]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll index 1374e9873e1c53d..02c3173adc654fd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -12,7 +12,7 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll b/llvm/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll index 12deea6a262ce46..f7bba85a8c48577 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll @@ -14,7 +14,7 @@ define void @vectorize_fp128(fp128 %c, fp128 %d) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x fp128> poison, fp128 [[C:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x fp128> [[TMP0]], fp128 [[D:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x fp128> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x fp128> [[TMP2]], splat (fp128 0xL00000000000000007FFF000000000000) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: [[OR_COND39:%.*]] = or i1 [[TMP4]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll index 234b658032383e6..a7a92bad5e5c1fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll @@ -44,7 +44,7 @@ define void @foo(ptr %i7, i32 %0, i1 %tobool62.not) { ; CHECK-NEXT: br label [[IF_END75]] ; CHECK: if.end75: ; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP20]], [[IF_THEN74]] ], [ [[TMP21]], [[IF_END72]] ] -; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP22]], +; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP22]], splat (i32 1) ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = mul <4 x i32> [[TMP23]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = sitofp <4 x i32> [[TMP25]] to <4 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll index 7e75970de34929c..3ac0d01cf9a2c10 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll @@ -12,7 +12,7 @@ define i1 @test(i32 %g, i16 %d) { ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i8> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], splat (i8 -3) ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i8> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll index 9e43cefef2801dd..12263b065d89c96 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll @@ -31,7 +31,7 @@ define void @test() { ; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0 ; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> -; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> , <4 x i64> poison) +; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> splat (i1 true), <4 x i64> poison) ; CHECK-SLP-THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], zeroinitializer ; CHECK-SLP-THRESHOLD-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll index 17f5abada632d84..1514fcec02bd9c8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll @@ -14,7 +14,7 @@ define void @foo1 (ptr noalias %x, ptr noalias %y) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[Y:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[X:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x ptr>, ptr [[TMP1]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP4]], <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP4]], <2 x i64> splat (i64 16) ; CHECK-NEXT: store <2 x ptr> [[TMP5]], ptr [[TMP2]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll index e0d7c12f70c2ef2..63b41627106e5c6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll @@ -18,7 +18,7 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 1) ; CHECK-NEXT: switch i32 [[TMP4]], label [[WHILE_BODY_BACKEDGE]] [ ; CHECK-NEXT: i32 2, label [[SW_BB:%.*]] ; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]] @@ -29,13 +29,13 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2 ; CHECK-NEXT: store i32 [[TMP8]], ptr [[INCDEC_PTR1]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 2) ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] ; CHECK: sw.bb6: ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 ; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index c9ff2d6426d2b63..2bcabfad3d09bae 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -34,7 +34,7 @@ define float @baz() { ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0 ; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], +; THRESHOLD-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], splat (float 2.000000e+00) ; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index cf9f7f7376ba44d..990803870ec23ff 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -29,7 +29,7 @@ define i32 @add_red(ptr %A, i32 %n) { ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[MUL]] ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], splat (float 7.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]]) ; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP3]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll index 3a35a63c159815b..f0a1836e6c273fc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -320,52 +320,52 @@ define i32 @foo1() local_unnamed_addr #0 { ; SSE-LABEL: @foo1( ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @ib, align 16 -; SSE-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], +; SSE-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP1]], ptr @ia, align 16 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 4), align 16 -; SSE-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], +; SSE-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP3]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 4), align 16 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 8), align 16 -; SSE-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], +; SSE-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP5]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 8), align 16 ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 12), align 16 -; SSE-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], +; SSE-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP7]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 12), align 16 ; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16 -; SSE-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], +; SSE-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16 ; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 20), align 16 -; SSE-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], +; SSE-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP11]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 20), align 16 ; SSE-NEXT: [[TMP12:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 24), align 16 -; SSE-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], +; SSE-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP13]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 24), align 16 ; SSE-NEXT: [[TMP14:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 28), align 16 -; SSE-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], +; SSE-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP15]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 28), align 16 ; SSE-NEXT: [[TMP16:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16 -; SSE-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], +; SSE-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP17]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16 ; SSE-NEXT: [[TMP18:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 36), align 16 -; SSE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], +; SSE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP19]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 36), align 16 ; SSE-NEXT: [[TMP20:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 40), align 16 -; SSE-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], +; SSE-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP21]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 40), align 16 ; SSE-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 44), align 16 -; SSE-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], +; SSE-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP23]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 44), align 16 ; SSE-NEXT: [[TMP24:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16 -; SSE-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], +; SSE-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP25]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16 ; SSE-NEXT: [[TMP26:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 52), align 16 -; SSE-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], +; SSE-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP27]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 52), align 16 ; SSE-NEXT: [[TMP28:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 56), align 16 -; SSE-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], +; SSE-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP29]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 56), align 16 ; SSE-NEXT: [[TMP30:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 60), align 16 -; SSE-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], +; SSE-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP31]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 60), align 16 ; SSE-NEXT: br label [[FOR_BODY5:%.*]] ; SSE: for.cond3: @@ -390,16 +390,16 @@ define i32 @foo1() local_unnamed_addr #0 { ; AVX512-LABEL: @foo1( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr @ib, align 16 -; AVX512-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], +; AVX512-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], splat (i32 -1) ; AVX512-NEXT: store <16 x i32> [[TMP1]], ptr @ia, align 16 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16 -; AVX512-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], +; AVX512-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) ; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16 ; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16 -; AVX512-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], +; AVX512-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], splat (i32 -1) ; AVX512-NEXT: store <16 x i32> [[TMP5]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16 ; AVX512-NEXT: [[TMP6:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16 -; AVX512-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], +; AVX512-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], splat (i32 -1) ; AVX512-NEXT: store <16 x i32> [[TMP7]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16 ; AVX512-NEXT: br label [[FOR_BODY5:%.*]] ; AVX512: for.cond3: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll index 252da8d7fa7a720..b49d99f353b8229 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll @@ -10,18 +10,18 @@ define i32 @test(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 3) ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i8> [[TMP2]] to <2 x double> ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], +; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], splat (double 1.000000e+00) ; CHECK-NEXT: store <2 x double> [[TMP13]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index 81845fed1134c38..cfbfd0ebc37bcab 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -22,9 +22,9 @@ define i32 @bar() local_unnamed_addr { ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <16 x i32> [[TMP5]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i32> [[TMP5]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> [[TMP10]], <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], -; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i32> [[TMP12]], -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], splat (i32 15) +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i32> [[TMP12]], splat (i32 65537) +; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], splat (i32 65535) ; CHECK-NEXT: [[TMP15:%.*]] = add <16 x i32> [[TMP14]], [[TMP11]] ; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i32> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP16]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll index e5d7ad138b4def2..166c819098c8c12 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll @@ -24,7 +24,7 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) { ; CHECK-NEXT: Entry: ; CHECK-NEXT: br i1 poison, label [[LOOP0:%.*]], label [[EXIT:%.*]] ; CHECK: loop0: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[USERBLOCK1:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ splat (float 5.000000e-01), [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[USERBLOCK1:%.*]] ] ; CHECK-NEXT: br i1 poison, label [[USERBLOCK0:%.*]], label [[BLKX:%.*]] ; CHECK: UserBlock0: ; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x float> [ zeroinitializer, [[LOOP0]] ], [ [[TMP5:%.*]], [[BLKX]] ] @@ -48,7 +48,7 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) { ; CHECK-NEXT: [[TMP9]] = fadd fast <4 x float> [[TMP8]], poison ; CHECK-NEXT: br i1 poison, label [[USERBLOCK1]], label [[LOOP_INNER]] ; CHECK: Exit: -; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x float> [ splat (float 5.000000e-01), [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ] ; CHECK-NEXT: [[IDX0:%.*]] = add i64 0, poison ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IDX0]] ; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[GEP0]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll index e1fd8a7ec88afc4..58ea4f8da01a42f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll @@ -15,20 +15,20 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[BF_LOAD_I1345:%.*]] = load i24, ptr [[TMP5]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i24> poison, i24 [[BF_LOAD_I1336]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i24> [[TMP6]], i24 [[BF_LOAD_I1345]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = and <2 x i24> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> , <2 x i24> [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = and <2 x i24> [[TMP7]], splat (i24 255) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], splat (i24 24) +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> splat (i24 23), <2 x i24> [[TMP8]] ; CHECK-NEXT: [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8> ; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP26]], -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], -; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> , <2 x i8> [[TMP23]] +; CHECK-NEXT: [[TMP13:%.*]] = and <2 x i32> [[TMP26]], splat (i32 254) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP13]], splat (i32 4) +; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP15]], <2 x i8> splat (i8 2), <2 x i8> [[TMP23]] ; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP15]], <2 x i8> , <2 x i8> [[TMP25]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <2 x i32> [[TMP14]], splat (i32 32) +; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP17]], <2 x i8> splat (i8 31), <2 x i8> [[TMP25]] ; CHECK-NEXT: [[TMP16:%.*]] = zext <2 x i8> [[TMP18]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <2 x i32> [[TMP16]], -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP17]], <2 x i8> , <2 x i8> [[TMP18]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq <2 x i32> [[TMP16]], splat (i32 54) +; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP27]], <2 x i8> splat (i8 53), <2 x i8> [[TMP18]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i8> [[TMP21]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = zext i8 [[TMP22]] to i32 ; CHECK-NEXT: store i32 [[TMP19]], ptr [[P1]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll index a316415dcc6b527..b4cbdfa10460f6b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll @@ -19,7 +19,7 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1) ; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 ; SSE-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 ; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] @@ -35,7 +35,7 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; AVX-NEXT: entry: ; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1) ; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 ; AVX-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 ; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] @@ -78,7 +78,7 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1) ; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 ; SSE-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64 ; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] @@ -94,7 +94,7 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; AVX-NEXT: entry: ; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1) ; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 ; AVX-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64 ; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll b/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll index 4b5b5da21cd21d9..35625d70a266719 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll @@ -9,8 +9,8 @@ define void @PR62969(ptr dereferenceable(16) %out, ptr dereferenceable(16) %in) ; CHECK-NEXT: [[IN0:%.*]] = getelementptr inbounds [2 x i64], ptr [[IN:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[OUT0:%.*]] = getelementptr inbounds [2 x i64], ptr [[OUT:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[IN0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 32) ; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[TMP2]], [[TMP3]] ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[OUT0]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll index 4898111960c0fdf..355f5306ee4db1c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll @@ -23,7 +23,7 @@ define void @e(ptr %c, i64 %0) { ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i64> poison, i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i64> [[TMP13]], <32 x i64> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i64> [[TMP14]], [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <32 x i64> [[TMP15]], +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <32 x i64> [[TMP15]], splat (i64 16) ; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP16]]) ; CHECK-NEXT: br i1 [[TMP17]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index ec8bcc85e7db0c7..78bfb8df51aebb9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -78,9 +78,9 @@ define i32 @foo2(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 ; CHECK: for.body: ; CHECK-NEXT: [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ [[TMP0]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], -; CHECK-NEXT: [[TMP4]] = fadd <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], splat (double 1.000000e+01) +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], splat (double 4.000000e+00) +; CHECK-NEXT: [[TMP4]] = fadd <2 x double> [[TMP3]], splat (double 4.000000e+00) ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_019]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -236,7 +236,7 @@ define float @sort_phi_type(ptr nocapture readonly %A) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ splat (float 1.000000e+01), [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll index 3b42ff9d85604d8..038409c56789b40 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll @@ -15,7 +15,7 @@ define void @Rf_GReset() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @d, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP1]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr inttoptr (i64 115 to ptr), @Rf_gpptr ; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[IF_END7:%.*]] ; CHECK: if.then: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll index c61d1fbc394798d..bcf36c623c27b8a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll @@ -9,7 +9,7 @@ define void @powof2div_uniform(ptr noalias nocapture %a, ptr noalias nocapture r ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll index d8e646b544179f5..5e8119f4207fb53 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll @@ -9,7 +9,7 @@ define void @powof2mul_uniform(ptr noalias nocapture %a, ptr noalias nocapture r ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -52,7 +52,7 @@ define void @negpowof2mul_uniform(ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], splat (i32 -2) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -182,20 +182,20 @@ define void @PR51436(ptr nocapture %a) { ; SSE-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 4 ; SSE-NEXT: [[GEP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 6 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 8 -; SSE-NEXT: [[TMP2:%.*]] = mul <2 x i64> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], +; SSE-NEXT: [[TMP2:%.*]] = mul <2 x i64> [[TMP1]], splat (i64 -17592186044416) +; SSE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 -17592186044416) ; SSE-NEXT: store <2 x i64> [[TMP3]], ptr [[A]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[GEP2]], align 8 -; SSE-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], -; SSE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP7]], +; SSE-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[TMP6]], splat (i64 -17592186044416) +; SSE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], splat (i64 -17592186044416) ; SSE-NEXT: store <2 x i64> [[TMP8]], ptr [[GEP2]], align 8 ; SSE-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr [[GEP4]], align 8 -; SSE-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[TMP11]], -; SSE-NEXT: [[TMP13:%.*]] = add <2 x i64> [[TMP12]], +; SSE-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP11]], splat (i64 -17592186044416) +; SSE-NEXT: [[TMP13:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -17592186044416) ; SSE-NEXT: store <2 x i64> [[TMP13]], ptr [[GEP4]], align 8 ; SSE-NEXT: [[TMP16:%.*]] = load <2 x i64>, ptr [[GEP6]], align 8 -; SSE-NEXT: [[TMP17:%.*]] = mul <2 x i64> [[TMP16]], -; SSE-NEXT: [[TMP18:%.*]] = add <2 x i64> [[TMP17]], +; SSE-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[TMP16]], splat (i64 -17592186044416) +; SSE-NEXT: [[TMP18:%.*]] = add <2 x i64> [[TMP10]], splat (i64 -17592186044416) ; SSE-NEXT: store <2 x i64> [[TMP18]], ptr [[GEP6]], align 8 ; SSE-NEXT: ret void ; @@ -203,12 +203,12 @@ define void @PR51436(ptr nocapture %a) { ; AVX-NEXT: entry: ; AVX-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 4 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[A]], align 8 -; AVX-NEXT: [[TMP2:%.*]] = mul <4 x i64> [[TMP1]], -; AVX-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], +; AVX-NEXT: [[TMP2:%.*]] = mul <4 x i64> [[TMP1]], splat (i64 -17592186044416) +; AVX-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], splat (i64 -17592186044416) ; AVX-NEXT: store <4 x i64> [[TMP3]], ptr [[A]], align 8 ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr [[GEP4]], align 8 -; AVX-NEXT: [[TMP7:%.*]] = mul <4 x i64> [[TMP6]], -; AVX-NEXT: [[TMP8:%.*]] = add <4 x i64> [[TMP7]], +; AVX-NEXT: [[TMP4:%.*]] = mul <4 x i64> [[TMP6]], splat (i64 -17592186044416) +; AVX-NEXT: [[TMP8:%.*]] = add <4 x i64> [[TMP4]], splat (i64 -17592186044416) ; AVX-NEXT: store <4 x i64> [[TMP8]], ptr [[GEP4]], align 8 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll index 8f96968b978a573..dc4e28fe261784d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll @@ -11,16 +11,16 @@ define void @_Z3fooPml(ptr nocapture %a, i64 %i) { ; CHECK-LABEL: @_Z3fooPml( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 4) ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[A]], align 8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @total, align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP3]], [[TMP5]] ; CHECK-NEXT: store i64 [[ADD]], ptr @total, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[A]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = lshr <2 x i64> [[TMP5]], -; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[A]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP5]], splat (i64 4) +; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[A]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @total, align 8 ; CHECK-NEXT: [[ADD9:%.*]] = add i64 [[TMP7]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll index cb24a9cefffa2ea..9fbe0a54b0688d7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -63,16 +63,16 @@ define void @pr35497() local_unnamed_addr #0 { ; SSE-NEXT: store i64 [[ADD]], ptr undef, align 1 ; SSE-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4 ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], +; SSE-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 2) +; SSE-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 20) ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> ; SSE-NEXT: [[TMP5:%.*]] = add nuw nsw <2 x i64> [[TMP4]], zeroinitializer ; SSE-NEXT: store <2 x i64> [[TMP5]], ptr undef, align 1 ; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <2 x i32> ; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1 -; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], -; SSE-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], -; SSE-NEXT: [[TMP10:%.*]] = lshr <2 x i64> [[TMP5]], +; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], splat (i64 2) +; SSE-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], splat (i64 20) +; SSE-NEXT: [[TMP10:%.*]] = lshr <2 x i64> [[TMP5]], splat (i64 6) ; SSE-NEXT: [[TMP11:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP10]] ; SSE-NEXT: store <2 x i64> [[TMP11]], ptr [[ARRAYIDX2_2]], align 1 ; SSE-NEXT: ret void @@ -84,15 +84,15 @@ define void @pr35497() local_unnamed_addr #0 { ; AVX-NEXT: store i64 [[ADD]], ptr undef, align 1 ; AVX-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 1 -; AVX-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], -; AVX-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], +; AVX-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 2) +; AVX-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 20) ; AVX-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer ; AVX-NEXT: store <2 x i64> [[TMP4]], ptr undef, align 1 ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[ADD]], i32 1 -; AVX-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], -; AVX-NEXT: [[TMP8:%.*]] = and <2 x i64> [[TMP7]], -; AVX-NEXT: [[TMP9:%.*]] = lshr <2 x i64> [[TMP4]], +; AVX-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], splat (i64 2) +; AVX-NEXT: [[TMP8:%.*]] = and <2 x i64> [[TMP7]], splat (i64 20) +; AVX-NEXT: [[TMP9:%.*]] = lshr <2 x i64> [[TMP4]], splat (i64 6) ; AVX-NEXT: [[TMP10:%.*]] = add nuw nsw <2 x i64> [[TMP8]], [[TMP9]] ; AVX-NEXT: store <2 x i64> [[TMP10]], ptr [[ARRAYIDX2_2]], align 1 ; AVX-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll index dd51105e1a6ca2d..2588012847d0960 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll @@ -58,7 +58,7 @@ define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture %p) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[B:%.*]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i64 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[D:%.*]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: ret void ; @@ -83,7 +83,7 @@ define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, ptr nocapture %4) { ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1:%.*]], i64 1 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2:%.*]], i64 2 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3:%.*]], i64 3 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP4:%.*]], align 16, !tbaa [[TBAA0]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll index 264a41daef55b8a..e8703f2081c66db 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll @@ -7,7 +7,7 @@ define <2 x float> @foo(ptr %A) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 2.000000e+00) ; CHECK-NEXT: ret <2 x float> [[TMP2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll index 6960e544b59a8a4..a77f2788200dad3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll @@ -7,7 +7,7 @@ define <2 x float> @foo(ptr %A) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 2.000000e+00) ; CHECK-NEXT: ret <2 x float> [[TMP2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll index 29021150ccd2e39..bae127527bbbeb2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll @@ -11,8 +11,8 @@ define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], splat (i32 15) +; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> splat (i32 255)) ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: ret void ; @@ -53,8 +53,8 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP7]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> ) +; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], splat (i32 15) +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> splat (i32 255)) ; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw <4 x i32> [[TMP10]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]] ; CHECK-NEXT: ret void @@ -104,11 +104,11 @@ define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], splat (i64 15) ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], -; CHECK-NEXT: [[TMP12:%.*]] = and <4 x i64> [[TMP9]], -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], splat (i32 255) +; CHECK-NEXT: [[TMP12:%.*]] = and <4 x i64> [[TMP9]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> splat (i64 255) ; CHECK-NEXT: store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index 19cbce0767c9252..6001c659a37b753 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -321,7 +321,7 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], ; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -330,7 +330,7 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], ; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void @@ -478,7 +478,7 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -487,7 +487,7 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void @@ -691,8 +691,8 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> ; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -702,8 +702,8 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> ; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index 9ac4208c6328513..b1942bfe073f2cd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -321,7 +321,7 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], ; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -330,7 +330,7 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], ; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void @@ -478,7 +478,7 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -487,7 +487,7 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void @@ -691,8 +691,8 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> ; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -702,8 +702,8 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> ; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll index 92a4095c7c57a89..4f8661f6bac078a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll @@ -91,7 +91,7 @@ define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2 ; AVX-NEXT: [[TMP17:%.*]] = shl nuw <2 x i64> [[TMP16]], ; AVX-NEXT: [[TMP18:%.*]] = or <2 x i64> [[TMP15]], [[TMP17]] ; AVX-NEXT: [[TMP19:%.*]] = zext <2 x i16> [[TMP7]] to <2 x i64> -; AVX-NEXT: [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], +; AVX-NEXT: [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], splat (i64 16) ; AVX-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP18]], [[TMP20]] ; AVX-NEXT: [[TMP22:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i64> ; AVX-NEXT: [[TMP23:%.*]] = or <2 x i64> [[TMP21]], [[TMP22]] @@ -123,7 +123,7 @@ define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2 ; AVX2-NEXT: [[TMP17:%.*]] = shl nuw <2 x i64> [[TMP16]], ; AVX2-NEXT: [[TMP18:%.*]] = or <2 x i64> [[TMP15]], [[TMP17]] ; AVX2-NEXT: [[TMP19:%.*]] = zext <2 x i16> [[TMP7]] to <2 x i64> -; AVX2-NEXT: [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], +; AVX2-NEXT: [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], splat (i64 16) ; AVX2-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP18]], [[TMP20]] ; AVX2-NEXT: [[TMP22:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i64> ; AVX2-NEXT: [[TMP23:%.*]] = or <2 x i64> [[TMP21]], [[TMP22]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll index b1bc5d14550ef70..26258402b9781da 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll @@ -4,7 +4,7 @@ define void @foo(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[T1:%.*]], align 1, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], splat (i8 64) ; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP2]], <8 x i8> [[TMP4]] ; CHECK-NEXT: store <8 x i8> [[TMP5]], ptr [[T0:%.*]], align 1, !tbaa [[TBAA0]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll index 6f70aa5f4f6afb8..cb02f4d10923c83 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll @@ -11,7 +11,7 @@ target triple = "x86_64-unknown-unknown" define void @exact(ptr %x) { ; CHECK-LABEL: @exact( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -40,7 +40,7 @@ define void @exact(ptr %x) { define void @not_exact(ptr %x) { ; CHECK-LABEL: @not_exact( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -69,7 +69,7 @@ define void @not_exact(ptr %x) { define void @nsw(ptr %x) { ; CHECK-LABEL: @nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -98,7 +98,7 @@ define void @nsw(ptr %x) { define void @not_nsw(ptr %x) { ; CHECK-LABEL: @not_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -127,7 +127,7 @@ define void @not_nsw(ptr %x) { define void @nuw(ptr %x) { ; CHECK-LABEL: @nuw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -156,7 +156,7 @@ define void @nuw(ptr %x) { define void @not_nuw(ptr %x) { ; CHECK-LABEL: @not_nuw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -185,7 +185,7 @@ define void @not_nuw(ptr %x) { define void @not_nsw_but_nuw(ptr %x) { ; CHECK-LABEL: @not_nsw_but_nuw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -214,7 +214,7 @@ define void @not_nsw_but_nuw(ptr %x) { define void @nnan(ptr %x) { ; CHECK-LABEL: @nnan( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], splat (float 1.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -243,7 +243,7 @@ define void @nnan(ptr %x) { define void @not_nnan(ptr %x) { ; CHECK-LABEL: @not_nnan( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], splat (float 1.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -272,7 +272,7 @@ define void @not_nnan(ptr %x) { define void @only_fast(ptr %x) { ; CHECK-LABEL: @only_fast( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], splat (float 1.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -301,7 +301,7 @@ define void @only_fast(ptr %x) { define void @only_arcp(ptr %x) { ; CHECK-LABEL: @only_arcp( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], splat (float 1.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -330,8 +330,8 @@ define void @only_arcp(ptr %x) { define void @addsub_all_nsw(ptr %x) { ; CHECK-LABEL: @addsub_all_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: ret void @@ -361,8 +361,8 @@ define void @addsub_all_nsw(ptr %x) { define void @addsub_some_nsw(ptr %x) { ; CHECK-LABEL: @addsub_some_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: ret void @@ -392,8 +392,8 @@ define void @addsub_some_nsw(ptr %x) { define void @addsub_no_nsw(ptr %x) { ; CHECK-LABEL: @addsub_no_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: ret void @@ -424,7 +424,7 @@ define void @fcmp_fast(ptr %x) #1 { ; CHECK-LABEL: @fcmp_fast( ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> splat (double -0.000000e+00), [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8 ; CHECK-NEXT: ret void @@ -482,7 +482,7 @@ define void @fcmp_no_fast(ptr %x) #1 { ; CHECK-LABEL: @fcmp_no_fast( ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll index 4c8d6b734ddc150..789ac9ef23b311a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll @@ -24,7 +24,7 @@ define i1 @test1(i32 %x, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], splat (i32 1) ; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[D]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 6200e3ae43fc988..0771fabef3e0288 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -274,8 +274,8 @@ define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42) +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false @@ -352,9 +352,9 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_partial( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], splat (i32 42) ; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 @@ -481,7 +481,7 @@ define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) { define i1 @logical_and_icmp_extra_args(<4 x i32> %x, i1 %c0, i1 %c1, i1 %c2) { ; CHECK-LABEL: @logical_and_icmp_extra_args( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 17) ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C0:%.*]], i1 false diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll index ce9158d8bf2eea1..35595b79cd708b1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -22,7 +22,7 @@ define i32 @reduce(ptr nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[I_015]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 [[MUL]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[TMP0]], splat (double 7.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[TMP2]], [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll index 5dcd5d3190ad0cd..29a8a229980e92f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -52,7 +52,7 @@ define double @foo(ptr nocapture %D) { define i1 @two_wide_fcmp_reduction(<2 x double> %a0) { ; CHECK-LABEL: @two_wide_fcmp_reduction( -; CHECK-NEXT: [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], +; CHECK-NEXT: [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], splat (double 1.000000e+00) ; CHECK-NEXT: [[B:%.*]] = extractelement <2 x i1> [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = extractelement <2 x i1> [[A]], i32 1 ; CHECK-NEXT: [[D:%.*]] = and i1 [[B]], [[C]] @@ -67,7 +67,7 @@ define i1 @two_wide_fcmp_reduction(<2 x double> %a0) { define double @fadd_reduction(<2 x double> %a0) { ; CHECK-LABEL: @fadd_reduction( -; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x double> [[A0:%.*]], +; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x double> [[A0:%.*]], splat (double 1.000000e+00) ; CHECK-NEXT: [[B:%.*]] = extractelement <2 x double> [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = extractelement <2 x double> [[A]], i32 1 ; CHECK-NEXT: [[D:%.*]] = fadd fast double [[B]], [[C]] @@ -102,7 +102,7 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 ; CHECK-NEXT: [[NOT_OR_COND9:%.*]] = or i1 [[TMP11]], [[TMP12]] @@ -144,7 +144,7 @@ define i1 @fcmp_lt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fdiv <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], splat (double 0x3EB0C6F7A0B5ED8D) ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 ; CHECK-NEXT: [[NOT_OR_COND:%.*]] = or i1 [[TMP10]], [[TMP11]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll index e700e7fa3a85c31..1922e935cee4be0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll @@ -14,7 +14,7 @@ define void @test(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <8 x ptr> [[TMP1]], <8 x i64> ; CHECK-NEXT: [[GEP2_0:%.*]] = getelementptr inbounds double, ptr [[ARG1:%.*]], i64 16 -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x double> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true), <8 x double> poison) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x double>, ptr [[GEP2_0]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <8 x double> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x double>, ptr [[ARG1]], align 8 @@ -24,7 +24,7 @@ define void @test(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr ; CHECK-NEXT: [[I142:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 ; CHECK-NEXT: [[I143:%.*]] = insertelement <2 x double> [[I142]], double [[TMP9]], i64 1 ; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> -; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I143]], <2 x ptr> [[P]], i32 8, <2 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I143]], <2 x ptr> [[P]], i32 8, <2 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll index 6fbf01013b6d292..f0272d591f0c339 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll @@ -11,7 +11,7 @@ define void @rdx_feeds_single_insert(<2 x double> %v, ptr nocapture readonly %ar ; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP1]]) ; CHECK-NEXT: [[I:%.*]] = insertelement <2 x double> [[V:%.*]], double [[TMP2]], i64 1 ; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> -; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I]], <2 x ptr> [[P]], i32 8, <2 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I]], <2 x ptr> [[P]], i32 8, <2 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll index 59b0352a825929c..0807a1bd4cdea45 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll @@ -8,11 +8,11 @@ define i32 @test(ptr noalias %p, ptr noalias %addr) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, <8 x ptr> [[TMP5]], <8 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]]) ; CHECK-NEXT: ret i32 [[TMP8]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll index cd7ad210ca56787..c7c67d31f9ded66 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll @@ -7,7 +7,7 @@ define void @test(ptr noalias %0, ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x ptr> [[TMP2]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[TMP3]], <8 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison) +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP8]], <16 x float> , <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 042fb88297e2ba3..cb955ff91ed8101 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -29,7 +29,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP12]], <4 x i8> [[TMP13]], i64 4) ; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> ; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v2i8(<16 x i8> [[TMP14]], <2 x i8> [[TMP15]], i64 2) -; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], splat (i8 1) ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: ret void ; CHECK: if.end50.i: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll index 6bbc33bac80f9d5..79ce74bd21dbcf5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll @@ -14,7 +14,7 @@ define i32 @foo(i32 %0, ptr %1, ptr %2) { ; CHECK-NEXT: br label [[T37:%.*]] ; CHECK: t37: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[TMP3:%.*]] ], [ [[T89:%.*]], [[T37]] ] -; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <2 x float> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[TMP6]] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 2, i64 0 ; CHECK-NEXT: store <4 x float> [[SHUFFLE]], ptr [[T21]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll index 14033f28b931632..d1b177e2b6dea5c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll @@ -5,7 +5,7 @@ define void @test(ptr %p, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[E:%.*]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[F:%.*]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder-non-empty.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder-non-empty.ll index 3bece6b7cf9a7ed..9d63c0f1aa59e37 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder-non-empty.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder-non-empty.ll @@ -6,7 +6,7 @@ define double @test01() { ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr null, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <2 x ptr> zeroinitializer, <2 x i32> [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP2]], i32 8, <2 x i1> , <2 x double> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP2]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> , <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll index f10e0a840816698..26e62d36fb6a825 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll @@ -11,12 +11,12 @@ define i32 @slp_schedule_bundle() local_unnamed_addr #0 { ; CHECK-LABEL: @slp_schedule_bundle( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], splat (i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @a, align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr ([1 x i32], ptr @b, i64 4, i64 0), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 31) +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr getelementptr ([1 x i32], ptr @a, i64 4, i64 0), align 4 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll index a009b1eaf65f136..242d66fda569a67 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -8,7 +8,7 @@ define void @wombat(ptr %ptr, ptr %ptr1) { ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], splat (i32 -1) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP1]], undef ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> undef, <4 x i32> [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll index 89c64d64b9c8447..1d101318bdf876b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll @@ -14,7 +14,7 @@ define i32 @rollable(ptr noalias nocapture %in, ptr noalias nocapture %out, i64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], splat (i32 7) ; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP8]] = add i64 [[I_019]], 1 @@ -76,11 +76,11 @@ define i32 @unrollable(ptr %in, ptr %out, i64 %n) nounwind ssp uwtable { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i32> [[TMP7]], splat (i32 7) ; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP11:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP11]], splat (i32 7) ; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i32> [[TMP12]], ; CHECK-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[BARRIER:%.*]] = call i32 @goo(i32 0) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll index a0e17446b6abb10..917baaa86460c4f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll @@ -7,7 +7,7 @@ define void @foo(ptr %ptr) { ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 328 ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 334 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[GEP0]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP1]], splat (i16 -1) ; CHECK-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double> ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr [[GEP3]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll index ba83ff096c9acbd..8fe7d15b69cb180 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll @@ -8,14 +8,14 @@ define void @test(ptr noalias %p, ptr noalias %addr, ptr noalias %s) { ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 4, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP9]], [[TMP6]] ; CHECK-NEXT: store <8 x i32> [[TMP10]], ptr [[S:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll index f4a0fc84cee8e6d..88a0ba59fa633c6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll @@ -6,7 +6,7 @@ define void @stackrestore1(ptr %out) { ; CHECK-LABEL: @stackrestore1( ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() ; CHECK-NEXT: [[LOCAL_ALLOCA:%.*]] = alloca [16 x i8], align 4 -; CHECK-NEXT: store <4 x float> , ptr [[LOCAL_ALLOCA]], align 4 +; CHECK-NEXT: store <4 x float> splat (float 0x3FF3333340000000), ptr [[LOCAL_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[LOCAL_ALLOCA]], align 4 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll index e93c5244dfbe2c4..977942aa06c51ae 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll @@ -8,7 +8,7 @@ define void @basecase(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @basecase( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 ; CHECK-NEXT: store ptr null, ptr [[A]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -34,7 +34,7 @@ define void @allocas(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 @@ -126,7 +126,7 @@ define void @stacksave2(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]] @@ -161,7 +161,7 @@ define void @stacksave3(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -188,7 +188,7 @@ define void @stacksave3(ptr %a, ptr %b, ptr %c) { define void @stacksave4(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @stacksave4( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]] @@ -218,7 +218,7 @@ define void @stacksave4(ptr %a, ptr %b, ptr %c) { define void @stacksave5(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @stacksave5( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]] @@ -258,7 +258,7 @@ define void @stackrestore1(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: store i8 0, ptr [[V2]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll index 378403ac304683c..02f8c552fec887c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll @@ -26,7 +26,7 @@ define void @_Z8DistanceIlLi5EEvPfiPmS0_(ptr %p1, i32 %p2, ptr %p3, ptr %p4) { ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8 ; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 @@ -145,7 +145,7 @@ define void @store15(ptr %p1, i32 %p2, ptr %p3, ptr %p4) { ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8 ; CHECK-NEXT: ret void ; @@ -223,7 +223,7 @@ define void @store16(ptr %p1, i32 %p2, ptr %p3, ptr %p4) { ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll index 2f126df81f23055..83e1bef8fa066a8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll @@ -26,7 +26,7 @@ define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) { ; CHECK-NEXT: [[CONV_I_I1743_3:%.*]] = fptoui float [[TMP17]] to i32 ; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[CONV_I_I1743_3]], 0 ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x float> [[TMP16]] to <4 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], splat (i32 1333788672) ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3 ; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP21]], i1 [[TMP18]], i1 false ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll index 69ae26b9f258560..912b148c3100687 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll @@ -218,7 +218,7 @@ define void @supernode_scheduling_cross_block(ptr %Aarray, ptr %Barray, ptr %Sar ; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8 ; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1 -; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], +; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], splat (double 2.000000e+00) ; ENABLED-NEXT: br label [[BB:%.*]] ; ENABLED: bb: ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll index 71b957ac7bbd895..dbd4adea4a4ece0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll @@ -15,7 +15,7 @@ define void @foo(ptr nocapture %x) #0 { ; CHECK-NEXT: [[BAD:%.*]] = fadd float [[BAD]], 0.000000e+00 ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, [[BB1:%.*]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, [[BB1:%.*]] ], [ splat (i32 2), [[ENTRY:%.*]] ] ; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[X:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll index 96d4b84e036918e..6e2a43ac5f9f107 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll @@ -166,7 +166,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01) ; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -177,7 +177,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01) ; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll index 373feb4a658f0b7..0ed12760b563fa9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -52,7 +52,7 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll index da935173b20d112..f47373747e578a0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -52,7 +52,7 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll index e52b29a7f681c7c..d650a972ad8ca26 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll @@ -47,7 +47,7 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll index e30cb76d53d9280..869a9d1aee80e38 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll @@ -225,8 +225,8 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], splat (i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], splat (i32 -1) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST]], align 4 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3 @@ -599,8 +599,8 @@ define void @addsub1f(ptr noalias %dst, ptr noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], splat (float -1.000000e+00) +; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], splat (float -1.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[DST]], align 4 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll index f29d48830491943..79bef36515a88a2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll @@ -24,7 +24,7 @@ define double @root_selection(double %a, double %b, double %c, double %d) local_ ; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast <2 x double> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast <2 x double> [[TMP9]], splat (double 1.400000e+00) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 ; CHECK-NEXT: [[I07:%.*]] = fadd fast double undef, [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll index 51b635837d3b59b..da1b87b28a30450 100644 --- a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll +++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll @@ -9,7 +9,7 @@ define i32 @test(i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], splat (i64 273837369) ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll index 287b623f63690a7..ad4daeab003f5f0 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll @@ -153,7 +153,7 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[INP]], 5 ; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0 ; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1 -; CHECK-NEXT: [[DIV0:%.*]] = sdiv <2 x i8> , [[V]] +; CHECK-NEXT: [[DIV0:%.*]] = sdiv <2 x i8> splat (i8 -128), [[V]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[DIV0]], i8 [[TMP1]], i64 0 ; CHECK-NEXT: ret <2 x i8> [[R]] diff --git a/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll index f0d5629cfc22e9b..3cf359fd47351a6 100644 --- a/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll @@ -12,8 +12,8 @@ define void @test(ptr %0, i8 %1, i1 %cmp12.i) { ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[PRE:%.*]] ; CHECK: pre: -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP5]], <8 x i8> ) -; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP5]], <8 x i8> splat (i8 1)) +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i8> [[TMP6]], splat (i8 1) ; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]] ; CHECK-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 ; CHECK-NEXT: br label [[PRE]] diff --git a/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll b/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll index 94aa8de5fdb72ce..118507c790bfc8b 100644 --- a/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll @@ -14,7 +14,7 @@ define void @test(ptr %top) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i8> , i8 [[TMP4]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i8> [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i8> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i8> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i8> [[TMP7]], splat (i8 2) ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] ; CHECK: for.cond.i: ; CHECK-NEXT: store <4 x i8> [[TMP8]], ptr null, align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll b/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll index 6cd44c297882746..eba97b715ea5882 100644 --- a/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll +++ b/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll @@ -6,7 +6,7 @@ define i32 @test(i1 %.b, i8 %conv18, i32 %k.promoted61) { ; CHECK-SAME: i1 [[DOTB:%.*]], i8 [[CONV18:%.*]], i32 [[K_PROMOTED61:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[DOTB]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8> ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8> diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll index 5f02b0009367c71..74e52da6880400c 100644 --- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll @@ -469,7 +469,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], splat (double 1.000000e+00) ; CHECK-NEXT: ret <4 x double> [[TMP6]] ; %t0 = fadd double %w , 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll index 63d55f7b1deb582..8e3a941932c9761 100644 --- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll @@ -503,7 +503,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], splat (double 1.000000e+00) ; CHECK-NEXT: ret <4 x double> [[TMP6]] ; %t0 = fadd double %w , 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll index f53e22d89d5169b..50cc97a529f5fb6 100644 --- a/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll @@ -23,7 +23,7 @@ define dso_local void @j() local_unnamed_addr { ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i32> [[TMP5]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP7]], splat (float 1.000000e+01) ; CHECK-NEXT: [[TMP9:%.*]] = fsub <2 x float> , [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP10]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll index 408dda2211b8d17..62417268bf3d0a2 100644 --- a/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll +++ b/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll @@ -10,8 +10,8 @@ define i64 @src(i32 %a) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], splat (i64 4294967297) +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll index 2037e0d67d2f89d..4478eab7b827ab1 100644 --- a/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll +++ b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll @@ -10,7 +10,7 @@ define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) { ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], splat (i32 16) ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16> ; CHECK-NEXT: br label [[BB3:%.*]] @@ -22,8 +22,8 @@ define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) { ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL3_NOT]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]] -; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], -; CHECK-NEXT: [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], splat (i32 16) +; CHECK-NEXT: [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], splat (i32 16) ; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> ; CHECK-NEXT: br i1 true, label [[BB3]], label [[BB2]] ; CHECK: bb3: diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 4074b8654362e00..3ef0de177b478b4 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -13,7 +13,7 @@ define i64 @test(ptr %p) { ; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> ; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) ; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) -; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], +; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], splat (i64 42) ; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) ; RISCV-NEXT: ret i64 [[TMP6]] ; @@ -21,7 +21,7 @@ define i64 @test(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 42) ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) ; CHECK-NEXT: ret i64 [[TMP3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll index 1b37c0a77e94e09..c1efc85da554d45 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll @@ -27,7 +27,7 @@ define i32 @test(ptr nocapture readonly %p) { ; CHECK: for.body: ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], splat (i32 42) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/reudction-or-non-poisoned.ll b/llvm/test/Transforms/SLPVectorizer/reudction-or-non-poisoned.ll index 0359c8de8c96c87..48a46662fef0851 100644 --- a/llvm/test/Transforms/SLPVectorizer/reudction-or-non-poisoned.ll +++ b/llvm/test/Transforms/SLPVectorizer/reudction-or-non-poisoned.ll @@ -8,7 +8,7 @@ define i1 @test(i32 %x, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[B]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[C]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) ; CHECK-NEXT: ret i1 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll index 3e00550a8852150..b37c4c97a5d137d 100644 --- a/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll @@ -22,12 +22,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> , <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> +; CHECK-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> , <2 x double> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer ; CHECK-NEXT: store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll index 965bfc7074c6384..4e31095a1d49a58 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll @@ -18,10 +18,10 @@ define void @e() { ; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[DOTSPLAT]], ; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[DOTSPLAT]], ; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], splat (i32 2) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], splat (i32 2) +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], splat (i32 2) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <16 x i32> [[DOTSPLAT]], ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i1> [[TMP9]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll index 2f0bd4a8f1315c4..179c5b2f7f91cd1 100644 --- a/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll +++ b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll @@ -9,14 +9,14 @@ define void @foo(ptr %this, ptr %p, i32 %add7) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[ADD7:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: switch i32 undef, label [[SW_EPILOG:%.*]] [ ; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] ; CHECK-NEXT: i32 2, label [[SW_BB]] ; CHECK-NEXT: ] ; CHECK: sw.bb: -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[THIS:%.*]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: br label [[SW_EPILOG]] diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 6a1a23728d3c704..5326b9802ec6de2 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -240,7 +240,7 @@ define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6 ; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <2 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 2 ; CHECK-NEXT: store <2 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_SROA_0]], align 4 -; CHECK-NEXT: store <2 x i8> bitcast (<1 x i16> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4 +; CHECK-NEXT: store <2 x i8> bitcast (<1 x i16> splat (i16 123) to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4 ; CHECK-NEXT: [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_2_SROA_0]], align 4 ; CHECK-NEXT: store <2 x i8> [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4]], ptr [[DST_2]], align 2 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 6, i1 true) diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll index fe14f2ff17f8387..51425c6305fe187 100644 --- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll +++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll @@ -66,7 +66,7 @@ define <2 x i64> @scalarize_v2i64_ones_mask(ptr %p, <2 x i64> %passthru) { ; CHECK-LE-NEXT: ret <2 x i64> [[TMP1]] ; ; CHECK-LE-SVE-LABEL: @scalarize_v2i64_ones_mask( -; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> , <2 x i64> [[PASSTHRU:%.*]]) +; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> splat (i1 true), <2 x i64> [[PASSTHRU:%.*]]) ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]] ; ; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask( diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll index 2f73227f056707c..0acc551f81f64da 100644 --- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll +++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll @@ -62,7 +62,7 @@ define void @scalarize_v2i64_ones_mask(ptr %p, <2 x i64> %data) { ; CHECK-LE-NEXT: ret void ; ; CHECK-SVE-LE-LABEL: @scalarize_v2i64_ones_mask( -; CHECK-SVE-LE-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[DATA:%.*]], ptr [[P:%.*]], i32 8, <2 x i1> ) +; CHECK-SVE-LE-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[DATA:%.*]], ptr [[P:%.*]], i32 8, <2 x i1> splat (i1 true)) ; CHECK-SVE-LE-NEXT: ret void ; ; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask( diff --git a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll index fd50b1bcec295b8..3b6f5ccf3d3ea01 100644 --- a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll +++ b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll @@ -47,7 +47,7 @@ define void @f2() { ; CHECK: for.end: ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[E_SROA_3_2:%.*]] = phi <2 x i64> [ , [[FOR_END]] ], [ [[E_SROA_3_2]], [[FOR_BODY2:%.*]] ] +; CHECK-NEXT: [[E_SROA_3_2:%.*]] = phi <2 x i64> [ splat (i64 1), [[FOR_END]] ], [ [[E_SROA_3_2]], [[FOR_BODY2:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 6, [[FOR_END]] ], [ [[TMP0]], [[FOR_BODY2]] ] ; CHECK-NEXT: br i1 undef, label [[FOR_BODY2]], label [[FOR_COND1_FOR_END7_CRIT_EDGE:%.*]] ; CHECK: for.cond1.for.end7_crit_edge: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll index bdfac4f2b30cfdd..c86fa349200c540 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll @@ -82,7 +82,7 @@ define i32 @basic_veccond(i32 %N, <2 x i1> %cond, <2 x i32> %select_input) { ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COND1:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[SELECT_INPUT]], <2 x i32> +; CHECK-NEXT: [[COND1:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[SELECT_INPUT]], <2 x i32> splat (i32 42) ; CHECK-NEXT: [[VREDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[COND1]]) ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[VREDUCE]], [[RES]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll index 03db1bba7d22a0d..405afd5969a4130 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll @@ -125,7 +125,7 @@ define i64 @load_after_store(i32 %a, ptr %b, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64 @@ -234,7 +234,7 @@ define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) { ; CHECK-LABEL: @debug_metadata_diassign( ; CHECK-NEXT: bb0: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> -; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> , ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> splat (i16 7), ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2 ; CHECK-NEXT: ret i16 [[SPEC_SELECT]] ; @@ -311,12 +311,12 @@ define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ; CHECK-LABEL: @threshold_6( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 2), ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 3), ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 4), ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 5), ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 6), ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll b/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll index 00cee73da428c98..c80e22499775aa9 100644 --- a/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll +++ b/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll @@ -10,16 +10,16 @@ define i32 @align_both_equal() local_unnamed_addr { ; CHECK-LABEL: @align_both_equal( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true @@ -64,16 +64,16 @@ define i32 @align_not_equal() local_unnamed_addr { ; CHECK-LABEL: @align_not_equal( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true @@ -118,16 +118,16 @@ define i32 @align_single_zero() local_unnamed_addr { ; CHECK-LABEL: @align_single_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true @@ -172,16 +172,16 @@ define i32 @align_single_zero_second_greater_default() local_unnamed_addr { ; CHECK-LABEL: @align_single_zero_second_greater_default( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true @@ -226,16 +226,16 @@ define i32 @align_both_zero() local_unnamed_addr { ; CHECK-LABEL: @align_both_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll index 591e74c13413508..d85331f77b12f80 100644 --- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll +++ b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll @@ -105,13 +105,13 @@ define void @stride_is_minus_2s(i32 %b, i32 %s) { define void @stride_is_minus_2s_vec(<2 x i32> %b, <2 x i32> %s) { ; CHECK-LABEL: @stride_is_minus_2s_vec( -; CHECK-NEXT: [[S6:%.*]] = mul <2 x i32> [[S:%.*]], +; CHECK-NEXT: [[S6:%.*]] = mul <2 x i32> [[S:%.*]], splat (i32 6) ; CHECK-NEXT: [[T1:%.*]] = add <2 x i32> [[B:%.*]], [[S6]] ; CHECK-NEXT: call void @voo(<2 x i32> [[T1]]) -; CHECK-NEXT: [[S4:%.*]] = shl <2 x i32> [[S]], +; CHECK-NEXT: [[S4:%.*]] = shl <2 x i32> [[S]], splat (i32 2) ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[B]], [[S4]] ; CHECK-NEXT: call void @voo(<2 x i32> [[T2]]) -; CHECK-NEXT: [[S2:%.*]] = shl <2 x i32> [[S]], +; CHECK-NEXT: [[S2:%.*]] = shl <2 x i32> [[S]], splat (i32 1) ; CHECK-NEXT: [[T3:%.*]] = add <2 x i32> [[B]], [[S2]] ; CHECK-NEXT: call void @voo(<2 x i32> [[T3]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll index 775ad4c5ecc3639..ebd2c5bd2574b5e 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll @@ -562,7 +562,7 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i8> [[TMP53]], <16 x i8> [[TMP54]], <16 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP48]], [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], +; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], splat (i32 16) ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP40]] ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> @@ -589,9 +589,9 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP82:%.*]] = add nsw <16 x i32> [[TMP79]], [[TMP81]] ; CHECK-NEXT: [[TMP83:%.*]] = sub nsw <16 x i32> [[TMP78]], [[TMP80]] ; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <16 x i32> [[TMP82]], <16 x i32> [[TMP83]], <16 x i32> -; CHECK-NEXT: [[TMP85:%.*]] = lshr <16 x i32> [[TMP84]], -; CHECK-NEXT: [[TMP86:%.*]] = and <16 x i32> [[TMP85]], -; CHECK-NEXT: [[TMP87:%.*]] = mul nuw <16 x i32> [[TMP86]], +; CHECK-NEXT: [[TMP85:%.*]] = lshr <16 x i32> [[TMP84]], splat (i32 15) +; CHECK-NEXT: [[TMP86:%.*]] = and <16 x i32> [[TMP85]], splat (i32 65537) +; CHECK-NEXT: [[TMP87:%.*]] = mul nuw <16 x i32> [[TMP86]], splat (i32 65535) ; CHECK-NEXT: [[TMP88:%.*]] = add <16 x i32> [[TMP87]], [[TMP84]] ; CHECK-NEXT: [[TMP89:%.*]] = xor <16 x i32> [[TMP88]], [[TMP87]] ; CHECK-NEXT: [[TMP90:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP89]]) @@ -768,7 +768,7 @@ define i32 @full_reorder(ptr nocapture noundef readonly %pix1, i32 noundef %i_pi ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], +; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) ; CHECK-NEXT: [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]] ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> [[TMP43]], <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> [[TMP43]], <16 x i32> @@ -795,9 +795,9 @@ define i32 @full_reorder(ptr nocapture noundef readonly %pix1, i32 noundef %i_pi ; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]] ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], -; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], -; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], +; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15) +; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537) +; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535) ; CHECK-NEXT: [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]] ; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]] ; CHECK-NEXT: [[TMP74:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]]) diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll index 4216b0e643bb68e..3c672efbb5a07ab 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll @@ -25,7 +25,7 @@ define i32 @test_mask_or(<16 x i32> %a, ptr %b) { ; CHECK-LABEL: @test_mask_or( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 -; CHECK-NEXT: [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], +; CHECK-NEXT: [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A_MASKED]] to <16 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i8> [[WIDE_LOAD]], [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32> @@ -44,14 +44,14 @@ entry: define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) { ; CHECK-LABEL: @multiuse( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], -; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255) +; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255) ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[V_MASKED]] to <16 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i8> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[U_MASKED]] to <16 x i8> ; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32> diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 459ede173b841ae..7472e1bc52bb8c4 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -292,7 +292,7 @@ define <8 x i8> @undeflane(<8 x i8> %a, <8 x i8> %b) { define <8 x i8> @constantsplat(<8 x i8> %a) { ; CHECK-LABEL: @constantsplat( -; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], splat (i8 10) ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -339,7 +339,7 @@ define <8 x i8> @constantdiff2(<8 x i8> %a) { define <8 x half> @constantsplatf(<8 x half> %a) { ; CHECK-LABEL: @constantsplatf( -; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], splat (half 0xH4900) ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll index a22575ccb1ca214..46a622148c87185 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll @@ -28,7 +28,7 @@ define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) { define i32 @reduceshuffle_onein_const_v4i32(<4 x i32> %a) { ; CHECK-LABEL: @reduceshuffle_onein_const_v4i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -179,7 +179,7 @@ define i32 @reduceshuffle_twoin_extrashuffleuse_v4i32(<4 x i32> %a, <4 x i32> %b define i32 @reduceshuffle_twoin_extraotheruse_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_extraotheruse_v4i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) ; CHECK-NEXT: call void @use(<4 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] @@ -234,7 +234,7 @@ define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) { define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) { ; CHECK-LABEL: @reduceshuffle_onein_ext_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -247,7 +247,7 @@ define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) { define i32 @reduceshuffle_twoin_concat_v16i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_concat_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -260,7 +260,7 @@ define i32 @reduceshuffle_twoin_concat_v16i32(<8 x i32> %a, <8 x i32> %b) { define i32 @reduceshuffle_twoin_lowelt_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_lowelt_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -273,7 +273,7 @@ define i32 @reduceshuffle_twoin_lowelt_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_twoin_notlowelt_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_notlowelt_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -286,7 +286,7 @@ define i32 @reduceshuffle_twoin_notlowelt_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_twoin_uneven_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_uneven_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -299,9 +299,9 @@ define i32 @reduceshuffle_twoin_uneven_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_twoin_shr1_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_shr1_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i32> [[S]], -; CHECK-NEXT: [[A2:%.*]] = and <16 x i32> [[A1]], -; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i32> [[A2]], +; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i32> [[S]], splat (i32 15) +; CHECK-NEXT: [[A2:%.*]] = and <16 x i32> [[A1]], splat (i32 65537) +; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i32> [[A2]], splat (i32 65535) ; CHECK-NEXT: [[A4:%.*]] = add <16 x i32> [[A3]], [[S]] ; CHECK-NEXT: [[A5:%.*]] = xor <16 x i32> [[A4]], [[A3]] ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[A5]]) @@ -320,9 +320,9 @@ define i32 @reduceshuffle_twoin_shr1_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_twoin_shr2_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_shr2_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i32> , [[S]] -; CHECK-NEXT: [[A2:%.*]] = and <16 x i32> [[A1]], -; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i32> [[A2]], +; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i32> splat (i32 15), [[S]] +; CHECK-NEXT: [[A2:%.*]] = and <16 x i32> [[A1]], splat (i32 65537) +; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i32> [[A2]], splat (i32 65535) ; CHECK-NEXT: [[A4:%.*]] = add <16 x i32> [[A3]], [[S]] ; CHECK-NEXT: [[A5:%.*]] = xor <16 x i32> [[A4]], [[A3]] ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[A5]]) @@ -365,7 +365,7 @@ define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) { define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) { ; CHECK-LABEL: @reduceshuffle_onein_ext_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -378,7 +378,7 @@ define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) { define i16 @reduceshuffle_twoin_concat_v16i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_concat_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -391,7 +391,7 @@ define i16 @reduceshuffle_twoin_concat_v16i16(<8 x i16> %a, <8 x i16> %b) { define i16 @reduceshuffle_twoin_lowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_lowelt_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -404,7 +404,7 @@ define i16 @reduceshuffle_twoin_lowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { define i16 @reduceshuffle_twoin_notlowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_notlowelt_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -417,7 +417,7 @@ define i16 @reduceshuffle_twoin_notlowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { define i16 @reduceshuffle_twoin_uneven_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_uneven_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -430,9 +430,9 @@ define i16 @reduceshuffle_twoin_uneven_v16i16(<16 x i16> %a, <16 x i16> %b) { define i16 @reduceshuffle_twoin_ext_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_ext_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i16> [[S]], -; CHECK-NEXT: [[A2:%.*]] = and <16 x i16> [[A1]], -; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i16> [[A2]], +; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i16> [[S]], splat (i16 7) +; CHECK-NEXT: [[A2:%.*]] = and <16 x i16> [[A1]], splat (i16 257) +; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i16> [[A2]], splat (i16 255) ; CHECK-NEXT: [[A4:%.*]] = add <16 x i16> [[A3]], [[S]] ; CHECK-NEXT: [[A5:%.*]] = xor <16 x i16> [[A4]], [[A3]] ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[A5]]) diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization-shufflevector-splat.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization-shufflevector-splat.ll index 80602d12e26f8a8..1736ce46c1a7784 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization-shufflevector-splat.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization-shufflevector-splat.ll @@ -7,7 +7,7 @@ define <4 x i64> @add_v4i64_allonesmask(<4 x i64> %x) { ; CHECK-LABEL: define <4 x i64> @add_v4i64_allonesmask( ; CHECK-SAME: <4 x i64> [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[X]], <4 x i64> zeroinitializer, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <4 x i1> , i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <4 x i1> splat (i1 true), i32 0) ; CHECK-NEXT: ret <4 x i64> [[TMP2]] ; %1 = shufflevector <4 x i64> %x, <4 x i64> zeroinitializer, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll index 97608174b524d75..28d6b59e2b7f025 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll @@ -1617,7 +1617,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl) ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> , <1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <1 x i64> [[TMP4]] ; @@ -1626,7 +1626,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl) ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> , <1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <1 x i64> [[TMP4]] ; @@ -1643,7 +1643,7 @@ define <1 x i64> @add_v1i64_anymask(<1 x i64> %x, i64 %y, <1 x i1> %mask, i32 ze ; ALL-LABEL: @add_v1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> , <1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <1 x i64> [[TMP4]] ; @@ -1669,7 +1669,7 @@ define <4 x i64> @add_v4i64_allonesmask(<4 x i64> %x, i64 %y, i32 zeroext %evl) ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <4 x i1> [[SPLAT]], <4 x i1> poison, <4 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> , <4 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> splat (i64 42), <4 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> [[X:%.*]], <4 x i64> [[TMP3]], <4 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <4 x i64> [[TMP4]] ; @@ -1686,7 +1686,7 @@ define <4 x i64> @add_v4i64_anymask(<4 x i64> %x, i64 %y, <4 x i1> %mask, i32 ze ; ALL-LABEL: @add_v4i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> , <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> splat (i64 42), <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> [[X:%.*]], <4 x i64> [[TMP3]], <4 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <4 x i64> [[TMP4]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll index 21adcdfd29b8260..bbdd76c58b58ec1 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -514,7 +514,7 @@ define <2 x i64> @or_constant(i64 %x) { define <2 x i64> @or_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @or_constant_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -558,7 +558,7 @@ define <2 x double> @fadd_constant(double %x) { define <2 x double> @fadd_constant_not_undef_lane(double %x) { ; CHECK-LABEL: @fadd_constant_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], -4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -580,7 +580,7 @@ define <2 x double> @fsub_constant_op0(double %x) { define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @fsub_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub nsz double -4.200000e+01, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -602,7 +602,7 @@ define <2 x double> @fsub_constant_op1(double %x) { define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @fsub_constant_op1_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -624,7 +624,7 @@ define <2 x double> @fmul_constant(double %x) { define <2 x double> @fmul_constant_not_undef_lane(double %x) { ; CHECK-LABEL: @fmul_constant_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul double [[X:%.*]], -4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -646,7 +646,7 @@ define <2 x double> @fdiv_constant_op0(double %x) { define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -668,7 +668,7 @@ define <2 x double> @fdiv_constant_op1(double %x) { define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -690,7 +690,7 @@ define <2 x double> @frem_constant_op0(double %x) { define <2 x double> @frem_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @frem_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem double -4.200000e+01, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -712,7 +712,7 @@ define <2 x double> @frem_constant_op1(double %x) { define <2 x double> @frem_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @frem_constant_op1_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem nnan double [[X:%.*]], 4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll index e9ab99c3e0d9c44..edd92c3f1c14c0d 100644 --- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll +++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll @@ -52,7 +52,7 @@ define <2 x i1> @ins1_ins1_i64(i64 %x, i64 %y) { define <2 x i1> @ins0_ins0_f64(double %x, double %y) { ; CHECK-LABEL: @ins0_ins0_f64( ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ninf uge double [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> , i1 [[R_SCALAR]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i1> [[R]] ; %i0 = insertelement <2 x double> undef, double %x, i32 0 @@ -157,7 +157,7 @@ define <2 x i1> @constant_op1_i64(i64 %x) { define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) { ; CHECK-LABEL: @constant_op1_i64_not_undef_lane( ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sge i64 [[X:%.*]], 42 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> , i1 [[R_SCALAR]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i1> [[R]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -194,7 +194,7 @@ define <4 x i1> @constant_op0_i32(i32 %x) { define <4 x i1> @constant_op0_i32_not_undef_lane(i32 %x) { ; CHECK-LABEL: @constant_op0_i32_not_undef_lane( ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ule i32 42, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> , i1 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <4 x i1> [[R]] ; %ins = insertelement <4 x i32> undef, i32 %x, i32 1 @@ -216,7 +216,7 @@ define <2 x i1> @constant_op0_f64(double %x) { define <2 x i1> @constant_op0_f64_not_undef_lane(double %x) { ; CHECK-LABEL: @constant_op0_f64_not_undef_lane( ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ueq double -4.200000e+01, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> , i1 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i1> [[R]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -238,7 +238,7 @@ define <2 x i1> @constant_op1_f64(double %x) { define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) { ; CHECK-LABEL: @constant_op1_f32_not_undef_lane( ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01 -; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> , i1 [[R_SCALAR]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 ; CHECK-NEXT: ret <4 x i1> [[R]] ; %ins = insertelement <4 x float> undef, float %x, i32 0 diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll index 74a58c8d3136117..4c70438b2d6e4e0 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll @@ -126,7 +126,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8> -; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], +; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1) ; SSE-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -135,7 +135,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; AVX-LABEL: @PR35454_1( ; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> -; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], +; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1) ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -156,7 +156,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16> -; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], +; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1) ; SSE-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -165,7 +165,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; AVX-LABEL: @PR35454_2( ; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], +; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1) ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll index c423053a9a4839a..77b44d0e40e1446 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll @@ -220,7 +220,7 @@ define <4 x i32> @shuf_srem_v4i32_poison(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: define <4 x i32> @shuf_srem_v4i32_poison( ; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[SREM0:%.*]] = srem <4 x i32> [[A1]], [[A0]] -; CHECK-NEXT: [[SREM1:%.*]] = srem <4 x i32> , [[A0]] +; CHECK-NEXT: [[SREM1:%.*]] = srem <4 x i32> splat (i32 1), [[A0]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[SREM0]], <4 x i32> [[SREM1]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index c8c9aa161ae2899..83e088aaa4e27c4 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -185,7 +185,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8> -; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], +; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1) ; SSE-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -195,7 +195,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { ; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> -; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], +; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1) ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -217,7 +217,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16> -; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], +; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1) ; SSE-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -227,7 +227,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { ; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <8 x i16> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], +; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1) ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> diff --git a/llvm/test/tools/llvm-reduce/reduce-opcodes.ll b/llvm/test/tools/llvm-reduce/reduce-opcodes.ll index 3156d53f70f17d7..d154b15b610d041 100644 --- a/llvm/test/tools/llvm-reduce/reduce-opcodes.ll +++ b/llvm/test/tools/llvm-reduce/reduce-opcodes.ll @@ -179,7 +179,7 @@ define float @sqrt_ninf(float %a, float %b) { } ; CHECK-LABEL: @sqrt_vec( -; RESULT-NEXT: %op = fmul <2 x float> %a, , !dbg !7 +; RESULT-NEXT: %op = fmul <2 x float> %a, splat (float 2.000000e+00), !dbg !7 ; RESULT-NEXT: ret define <2 x float> @sqrt_vec(<2 x float> %a, <2 x float> %b) { %op = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a), !dbg !7 diff --git a/llvm/test/tools/llvm-reduce/reduce-operands-fp.ll b/llvm/test/tools/llvm-reduce/reduce-operands-fp.ll index 2ceebd7a5a86ac8..b547c819bf0de9f 100644 --- a/llvm/test/tools/llvm-reduce/reduce-operands-fp.ll +++ b/llvm/test/tools/llvm-reduce/reduce-operands-fp.ll @@ -36,13 +36,13 @@ ; ONE: %fadd3 = fadd float 1.000000e+00, 1.000000e+00 ; ONE: %fadd4 = fadd float 1.000000e+00, 1.000000e+00 ; ONE: %fadd5 = fadd float 1.000000e+00, 1.000000e+00 -; ONE: %fadd6 = fadd <2 x float> %arg2, -; ONE: %fadd7 = fadd <2 x float> , -; ONE: %fadd8 = fadd <2 x float> , zeroinitializer -; ONE: %fadd9 = fadd <2 x float> , -; ONE: %fadd10 = fadd <2 x float> , -; ONE: %fadd11 = fadd <2 x float> , -; ONE: %fadd12 = fadd <2 x float> , +; ONE: %fadd6 = fadd <2 x float> %arg2, splat (float 1.000000e+00) +; ONE: %fadd7 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) +; ONE: %fadd8 = fadd <2 x float> splat (float 1.000000e+00), zeroinitializer +; ONE: %fadd9 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) +; ONE: %fadd10 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) +; ONE: %fadd11 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) +; ONE: %fadd12 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) ; ZERO: %fadd0 = fadd float %arg0, 0.000000e+00 @@ -66,13 +66,13 @@ ; NAN: %fadd3 = fadd float 0x7FF8000000000000, 1.000000e+00 ; NAN: %fadd4 = fadd float 0x7FF8000000000000, 0x7FF8000000000000 ; NAN: %fadd5 = fadd float 0x7FF8000000000000, 0x7FF8000000000000 -; NAN: %fadd6 = fadd <2 x float> %arg2, -; NAN: %fadd7 = fadd <2 x float> , -; NAN: %fadd8 = fadd <2 x float> , zeroinitializer -; NAN: %fadd9 = fadd <2 x float> , -; NAN: %fadd10 = fadd <2 x float> , -; NAN: %fadd11 = fadd <2 x float> , -; NAN: %fadd12 = fadd <2 x float> , +; NAN: %fadd6 = fadd <2 x float> %arg2, splat (float 0x7FF8000000000000) +; NAN: %fadd7 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 0x7FF8000000000000) +; NAN: %fadd8 = fadd <2 x float> splat (float 0x7FF8000000000000), zeroinitializer +; NAN: %fadd9 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 1.000000e+00) +; NAN: %fadd10 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 0x7FF8000000000000) +; NAN: %fadd11 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 0x7FF8000000000000) +; NAN: %fadd12 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 0x7FF8000000000000) define void @foo(float %arg0, float %arg1, <2 x float> %arg2, <2 x float> %arg3) { bb0: diff --git a/llvm/test/tools/llvm-reduce/reduce-operands-int.ll b/llvm/test/tools/llvm-reduce/reduce-operands-int.ll index 3005058742ce31e..397a1595ca6b2cc 100644 --- a/llvm/test/tools/llvm-reduce/reduce-operands-int.ll +++ b/llvm/test/tools/llvm-reduce/reduce-operands-int.ll @@ -30,12 +30,12 @@ ; ONE: %add2 = add i32 1, 0 ; ONE: %add3 = add i32 1, 1 ; ONE: %add4 = add i32 1, 1 -; ONE: %add5 = add <2 x i32> %arg2, -; ONE: %add6 = add <2 x i32> , -; ONE: %add7 = add <2 x i32> , zeroinitializer -; ONE: %add8 = add <2 x i32> , -; ONE: %add9 = add <2 x i32> , -; ONE: %add10 = add <2 x i32> , +; ONE: %add5 = add <2 x i32> %arg2, splat (i32 1) +; ONE: %add6 = add <2 x i32> splat (i32 1), splat (i32 1) +; ONE: %add7 = add <2 x i32> splat (i32 1), zeroinitializer +; ONE: %add8 = add <2 x i32> splat (i32 1), splat (i32 1) +; ONE: %add9 = add <2 x i32> splat (i32 1), splat (i32 1) +; ONE: %add10 = add <2 x i32> splat (i32 1), splat (i32 1) ; ZERO: %add0 = add i32 %arg0, 0 diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index c884f83cb4d32dd..11d73ea7c84ad24 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -95,31 +95,31 @@ llvm.mlir.global internal constant @int_gep() : !llvm.ptr { // CHECK{LITERAL}: @dense_float_vector = internal global <3 x float> llvm.mlir.global internal @dense_float_vector(dense<[1.0, 2.0, 3.0]> : vector<3xf32>) : vector<3xf32> -// CHECK{LITERAL}: @splat_float_vector = internal global <3 x float> +// CHECK{LITERAL}: @splat_float_vector = internal global <3 x float> splat (float 4.200000e+01) llvm.mlir.global internal @splat_float_vector(dense<42.0> : vector<3xf32>) : vector<3xf32> // CHECK{LITERAL}: @dense_double_vector = internal global <3 x double> llvm.mlir.global internal @dense_double_vector(dense<[1.0, 2.0, 3.0]> : vector<3xf64>) : vector<3xf64> -// CHECK{LITERAL}: @splat_double_vector = internal global <3 x double> +// CHECK{LITERAL}: @splat_double_vector = internal global <3 x double> splat (double 4.200000e+01) llvm.mlir.global internal @splat_double_vector(dense<42.0> : vector<3xf64>) : vector<3xf64> // CHECK{LITERAL}: @dense_i64_vector = internal global <3 x i64> llvm.mlir.global internal @dense_i64_vector(dense<[1, 2, 3]> : vector<3xi64>) : vector<3xi64> -// CHECK{LITERAL}: @splat_i64_vector = internal global <3 x i64> +// CHECK{LITERAL}: @splat_i64_vector = internal global <3 x i64> splat (i64 42) llvm.mlir.global internal @splat_i64_vector(dense<42> : vector<3xi64>) : vector<3xi64> // CHECK{LITERAL}: @dense_float_vector_2d = internal global [2 x <2 x float>] [<2 x float> , <2 x float> ] llvm.mlir.global internal @dense_float_vector_2d(dense<[[1.0, 2.0], [3.0, 4.0]]> : vector<2x2xf32>) : !llvm.array<2 x vector<2xf32>> -// CHECK{LITERAL}: @splat_float_vector_2d = internal global [2 x <2 x float>] [<2 x float> , <2 x float> ] +// CHECK{LITERAL}: @splat_float_vector_2d = internal global [2 x <2 x float>] [<2 x float> splat (float 4.200000e+01), <2 x float> splat (float 4.200000e+01)] llvm.mlir.global internal @splat_float_vector_2d(dense<42.0> : vector<2x2xf32>) : !llvm.array<2 x vector<2xf32>> // CHECK{LITERAL}: @dense_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ], [2 x <2 x float>] [<2 x float> , <2 x float> ]] llvm.mlir.global internal @dense_float_vector_3d(dense<[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]> : vector<2x2x2xf32>) : !llvm.array<2 x !llvm.array<2 x vector<2xf32>>> -// CHECK{LITERAL}: @splat_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ], [2 x <2 x float>] [<2 x float> , <2 x float> ]] +// CHECK{LITERAL}: @splat_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> splat (float 4.200000e+01), <2 x float> splat (float 4.200000e+01)], [2 x <2 x float>] [<2 x float> splat (float 4.200000e+01), <2 x float> splat (float 4.200000e+01)]] llvm.mlir.global internal @splat_float_vector_3d(dense<42.0> : vector<2x2x2xf32>) : !llvm.array<2 x !llvm.array<2 x vector<2xf32>>> // @@ -929,7 +929,7 @@ llvm.func @multireturn_caller() { // CHECK-LABEL: define <4 x float> @vector_ops(<4 x float> {{%.*}}, <4 x i1> {{%.*}}, <4 x i64> {{%.*}}) llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>) -> vector<4xf32> { %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : vector<4xf32> -// CHECK-NEXT: %4 = fadd <4 x float> %0, +// CHECK-NEXT: %4 = fadd <4 x float> %0, splat (float 4.200000e+01) %1 = llvm.fadd %arg0, %0 : vector<4xf32> // CHECK-NEXT: %5 = select <4 x i1> %1, <4 x float> %4, <4 x float> %0 %2 = llvm.select %arg1, %1, %arg0 : vector<4xi1>, vector<4xf32> @@ -941,9 +941,9 @@ llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4 %5 = llvm.srem %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %9 = urem <4 x i64> %2, %2 %6 = llvm.urem %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %10 = fdiv <4 x float> %0, +// CHECK-NEXT: %10 = fdiv <4 x float> %0, splat (float 4.200000e+01) %7 = llvm.fdiv %arg0, %0 : vector<4xf32> -// CHECK-NEXT: %11 = frem <4 x float> %0, +// CHECK-NEXT: %11 = frem <4 x float> %0, splat (float 4.200000e+01) %8 = llvm.frem %arg0, %0 : vector<4xf32> // CHECK-NEXT: %12 = and <4 x i64> %2, %2 %9 = llvm.and %arg2, %arg2 : vector<4xi64> @@ -991,7 +991,7 @@ llvm.func @vector_splat_3d() -> !llvm.array<4 x array<16 x vector<4 x f32>>> { // CHECK-LABEL: @vector_splat_nonzero llvm.func @vector_splat_nonzero() -> vector<4xf32> { - // CHECK: ret <4 x float> + // CHECK: ret <4 x float> splat (float 1.000000e+00) %0 = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> llvm.return %0 : vector<4xf32> } @@ -1336,7 +1336,7 @@ llvm.func @structconstant() -> !llvm.struct<(i32, f32)> { // CHECK-LABEL: @indexconstantsplat llvm.func @indexconstantsplat() -> vector<3xi32> { %1 = llvm.mlir.constant(dense<42> : vector<3xindex>) : vector<3xi32> - // CHECK: ret <3 x i32> + // CHECK: ret <3 x i32> splat (i32 42) llvm.return %1 : vector<3xi32> }