Skip to content

Commit

Permalink
[X86][EVEX512] Add HasEVEX512 when NoVLX used for 512-bit patterns (
Browse files Browse the repository at this point in the history
#91106)

With KNL/KNC being deprecated, we don't need to care about such no VLX
cases anymore. We may remove such patterns in the future.

Fixes #90844

(cherry picked from commit 7963d9a)
  • Loading branch information
phoebewang authored and tstellar committed May 9, 2024
1 parent 58e44d3 commit 047cd91
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 22 deletions.
4 changes: 3 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29841,7 +29841,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return R;

// AVX512 implicitly uses modulo rotation amounts.
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
if ((Subtarget.hasVLX() ||
(Subtarget.hasAVX512() && Subtarget.hasEVEX512())) &&
32 <= EltSizeInBits) {
// Attempt to rotate by immediate.
if (IsCstSplat) {
unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI;
Expand Down
42 changes: 21 additions & 21 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,

// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
// smaller extract to enable EVEX->VEX.
let Predicates = [NoVLX] in {
let Predicates = [NoVLX, HasEVEX512] in {
def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
(v2i64 (VEXTRACTI128rr
(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
Expand Down Expand Up @@ -3068,7 +3068,7 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
}

let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;

Expand Down Expand Up @@ -3099,7 +3099,7 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
}

let Predicates = [HasBWI, NoVLX] in {
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;

Expand Down Expand Up @@ -3493,7 +3493,7 @@ multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,

// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
// available. Use a 512-bit operation and extract.
let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
Expand All @@ -3505,7 +3505,7 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
}

let Predicates = [HasBWI, NoVLX] in {
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;

Expand Down Expand Up @@ -4998,8 +4998,8 @@ defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
SchedWriteVecALU, HasAVX512, 1>, T8;

// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
let Predicates = [HasDQI, NoVLX] in {
// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
(EXTRACT_SUBREG
(VPMULLQZrr
Expand Down Expand Up @@ -5055,7 +5055,7 @@ multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
sub_xmm)>;
}

let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
Expand Down Expand Up @@ -6032,7 +6032,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
SchedWriteVecShift>;

// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZrr
Expand Down Expand Up @@ -6161,14 +6161,14 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecS
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;

defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;


// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLVQZrr
Expand Down Expand Up @@ -6219,7 +6219,7 @@ let Predicates = [HasAVX512, NoVLX] in {
}

// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORVQZrr
Expand Down Expand Up @@ -9816,7 +9816,7 @@ defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8,
X86vtruncus, X86vmtruncus>;

let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
(v8i16 (EXTRACT_SUBREG
(v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
Expand All @@ -9827,7 +9827,7 @@ def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
VR256X:$src, sub_ymm)))), sub_xmm))>;
}

let Predicates = [HasBWI, NoVLX] in {
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
(v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src, sub_ymm))), sub_xmm))>;
Expand Down Expand Up @@ -10370,7 +10370,7 @@ multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
EVEX_V128;
}
let Predicates = [prd, NoVLX] in {
let Predicates = [prd, NoVLX, HasEVEX512] in {
defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
}
Expand Down Expand Up @@ -11157,7 +11157,7 @@ defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
SchedWriteVecALU>;

// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v4i64 (abs VR256X:$src)),
(EXTRACT_SUBREG
(VPABSQZrr
Expand All @@ -11173,7 +11173,7 @@ let Predicates = [HasAVX512, NoVLX] in {
// Use 512bit version to implement 128/256 bit.
multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd, NoVLX] in {
let Predicates = [prd, NoVLX, HasEVEX512] in {
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
(EXTRACT_SUBREG
(!cast<Instruction>(InstrStr # "Zrr")
Expand Down Expand Up @@ -11792,7 +11792,7 @@ let Predicates = [HasAVX512] in {
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
}

let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
def : Pat<(v16i8 (vnot VR128X:$src)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/X86/pr90844.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-evex512 < %s | FileCheck %s

define void @PR90844() {
; CHECK-LABEL: PR90844:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm0, (%rax)
; CHECK-NEXT: retq
entry:
%0 = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> poison, <2 x i32> poison, <2 x i32> <i32 8, i32 24>)
%1 = and <2 x i32> %0, <i32 16711935, i32 -134152448>
%2 = or disjoint <2 x i32> zeroinitializer, %1
%3 = zext <2 x i32> %2 to <2 x i64>
%4 = shl nuw <2 x i64> %3, <i64 32, i64 32>
%5 = or disjoint <2 x i64> %4, zeroinitializer
store <2 x i64> %5, ptr poison, align 16
ret void
}

0 comments on commit 047cd91

Please sign in to comment.