diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c4cd2a672fe7b2..bc9368f327d0c6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -56903,7 +56903,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v8f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + if (VConstraint) + return std::make_pair(0U, &X86::VR128XRegClass); + return std::make_pair(0U, &X86::VR128RegClass); + case MVT::v8bf16: + if (!Subtarget.hasBF16() || !Subtarget.hasVLX()) + break; + if (VConstraint) + return std::make_pair(0U, &X86::VR128XRegClass); + return std::make_pair(0U, &X86::VR128RegClass); case MVT::f128: case MVT::v16i8: case MVT::v8i16: @@ -56918,7 +56926,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v16f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + if (VConstraint) + return std::make_pair(0U, &X86::VR256XRegClass); + return std::make_pair(0U, &X86::VR256RegClass); + case MVT::v16bf16: + if (!Subtarget.hasBF16() || !Subtarget.hasVLX()) + break; + if (VConstraint) + return std::make_pair(0U, &X86::VR256XRegClass); + return std::make_pair(0U, &X86::VR256RegClass); case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: @@ -56933,7 +56949,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v32f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + if (VConstraint) + return std::make_pair(0U, &X86::VR512RegClass); + return std::make_pair(0U, &X86::VR512_0_15RegClass); + case MVT::v32bf16: + if (!Subtarget.hasBF16()) + break; + if (VConstraint) + return std::make_pair(0U, &X86::VR512RegClass); + return std::make_pair(0U, &X86::VR512_0_15RegClass); case MVT::v64i8: case MVT::v32i16: case MVT::v8f64: @@ -56976,7 +57000,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v8f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + return std::make_pair(X86::XMM0, &X86::VR128RegClass); + case MVT::v8bf16: + if (!Subtarget.hasBF16() || !Subtarget.hasVLX()) + break; + return std::make_pair(X86::XMM0, &X86::VR128RegClass); case MVT::f128: case MVT::v16i8: case MVT::v8i16: @@ -56989,7 +57017,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v16f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + return std::make_pair(X86::YMM0, &X86::VR256RegClass); + case MVT::v16bf16: + if (!Subtarget.hasBF16() || !Subtarget.hasVLX()) + break; + return std::make_pair(X86::YMM0, &X86::VR256RegClass); case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: @@ -57002,7 +57034,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::v32f16: if (!Subtarget.hasFP16()) break; - [[fallthrough]]; + return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass); + case MVT::v32bf16: + if (!Subtarget.hasBF16()) + break; + return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass); case MVT::v64i8: case MVT::v32i16: case MVT::v8f64: diff --git a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll index fcea55c47cd3ec..e153387d16e72b 100644 --- a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll +++ b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll @@ -1,7 +1,7 @@ ; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -stop-after=finalize-isel > %t 2> %t.err ; RUN: FileCheck < %t %s ; RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -stop-after=finalize-isel | FileCheck --check-prefixes=CHECK,FP16 %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bf16,avx512fp16 -stop-after=finalize-isel | FileCheck --check-prefixes=CHECK,FP16 %s ; CHECK-LABEL: name: mask_Yk_i8 ; CHECK: %[[REG1:.*]]:vr512_0_15 = COPY %1 @@ -24,3 +24,14 @@ entry: %0 = tail call <32 x half> asm "vaddph\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <32 x half> %x, <32 x half> %y) ret <32 x half> %0 } + +; FP16-LABEL: name: mask_Yk_bf16 +; FP16: %[[REG1:.*]]:vr512_0_15 = COPY %1 +; FP16: %[[REG2:.*]]:vr512_0_15 = COPY %2 +; FP16: INLINEASM &"vaddph\09$3, $2, $0 {$1}", 0 /* attdialect */, {{.*}}, def %{{.*}}, {{.*}}, %{{.*}}, {{.*}}, %[[REG1]], {{.*}}, %[[REG2]], 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags +; CHECK-STDERR: couldn't allocate output register for constraint 'x' +define <32 x bfloat> @mask_Yk_bf16(i8 signext %msk, <32 x bfloat> %x, <32 x bfloat> %y) { +entry: + %0 = tail call <32 x bfloat> asm "vaddph\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <32 x bfloat> %x, <32 x bfloat> %y) + ret <32 x bfloat> %0 +}