From 3c4cc92d88bb7293926351eb0ddc48bce2751e99 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 13 Aug 2024 20:30:40 +0200 Subject: [PATCH] Demote(B)Float16 pass: only keep enabled for PPC. --- src/llvm-demote-float16.cpp | 47 ++++++++++-------------- test/llvmpasses/fastmath.jl | 26 -------------- test/llvmpasses/float16.ll | 71 +++++++++++++++++++++++++++++++++++-- 3 files changed, 87 insertions(+), 57 deletions(-) diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp index 5d0d9f5d37c40..7f1b076897fc8 100644 --- a/src/llvm-demote-float16.cpp +++ b/src/llvm-demote-float16.cpp @@ -49,37 +49,28 @@ extern JuliaOJIT *jl_ExecutionEngine; namespace { -static bool have_fp16(Function &caller, const Triple &TT) { - Attribute FSAttr = caller.getFnAttribute("target-features"); - StringRef FS = ""; - if (FSAttr.isValid()) - FS = FSAttr.getValueAsString(); - else if (jl_ExecutionEngine) - FS = jl_ExecutionEngine->getTargetFeatureString(); - // else probably called from opt, just do nothing - if (TT.isAArch64()) { - if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){ - return true; - } - } else if (TT.getArch() == Triple::x86_64) { - if (FS.find("+avx512fp16") != llvm::StringRef::npos){ - return true; - } - } - if (caller.hasFnAttribute("julia.hasfp16")) { - return true; - } - return false; +static bool have_fp16(Function &F, const Triple &TT) { + // for testing purposes + Attribute Attr = F.getFnAttribute("julia.hasfp16"); + if (Attr.isValid()) + return Attr.getValueAsBool(); + + // llvm/llvm-project#97975: on some platforms, `half` uses excessive precision + if (TT.isPPC()) + return false; + + return true; } -static bool have_bf16(Function &caller, const Triple &TT) { - if (caller.hasFnAttribute("julia.hasbf16")) { - return true; - } +static bool have_bf16(Function &F, const Triple &TT) { + // for testing purposes + Attribute Attr = F.getFnAttribute("julia.hasbf16"); + if (Attr.isValid()) + return Attr.getValueAsBool(); - // there's no targets that fully support bfloat yet;, - // AVX512BF16 only provides conversion and dot product instructions. - return false; + // https://github.com/llvm/llvm-project/issues/97975#issuecomment-2218770199: + // on current versions of LLVM, bf16 always uses TypeSoftPromoteHalf + return true; } static bool demoteFloat16(Function &F) diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl index dd0892be56a0b..3c4c1d491ec28 100644 --- a/test/llvmpasses/fastmath.jl +++ b/test/llvmpasses/fastmath.jl @@ -16,29 +16,3 @@ import Base.FastMath # CHECK: call fast float @llvm.sqrt.f32(float %"x::Float32") emit(FastMath.sqrt_fast, Float32) - - -# Float16 operations should be performed as Float32, unless @fastmath is specified -# TODO: this is not true for platforms that natively support Float16 - -foo(x::T,y::T) where T = x-y == zero(T) -# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{ -# CHECK-DAG: %[[XEXT:[0-9]+]] = fpext half %[[X]] to float -# CHECK-DAG: %[[YEXT:[0-9]+]] = fpext half %[[Y]] to float -# CHECK: %[[DIFF:[0-9]+]] = fsub float %[[XEXT]], %[[YEXT]] -# CHECK: %[[TRUNC:[0-9]+]] = fptrunc float %[[DIFF]] to half -# CHECK: %[[DIFFEXT:[0-9]+]] = fpext half %[[TRUNC]] to float -# CHECK: %[[CMP:[0-9]+]] = fcmp oeq float %[[DIFFEXT]], 0.000000e+00 -# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8 -# CHECK: ret i8 %[[ZEXT]] -# CHECK: } -emit(foo, Float16, Float16) - -@fastmath foo(x::T,y::T) where T = x-y == zero(T) -# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{ -# CHECK: %[[DIFF:[0-9]+]] = fsub fast half %[[X]], %[[Y]] -# CHECK: %[[CMP:[0-9]+]] = fcmp fast oeq half %[[DIFF]], 0xH0000 -# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8 -# CHECK: ret i8 %[[ZEXT]] -# CHECK: } -emit(foo, Float16, Float16) diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll index 33069c71179ed..d1dfb6aca11dd 100644 --- a/test/llvmpasses/float16.ll +++ b/test/llvmpasses/float16.ll @@ -99,7 +99,7 @@ top: ret half %13 } -define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) { +define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) #2 { top: ; CHECK-LABEL: @demote_bfloat_test( ; CHECK-NEXT: top: @@ -160,5 +160,70 @@ top: ret bfloat %13 } -attributes #0 = { "target-features"="-avx512fp16" } -attributes #1 = { "target-features"="+avx512fp16" } +define bfloat @native_bfloat_test(bfloat %a, bfloat %b) #3 { +top: +; CHECK-LABEL: @native_bfloat_test( +; CHECK-NEXT: top: +; CHECK-NEXT: %0 = fadd bfloat %a, %b +; CHECK-NEXT: %1 = fadd bfloat %0, %b +; CHECK-NEXT: %2 = fadd bfloat %1, %b +; CHECK-NEXT: %3 = fmul bfloat %2, %b +; CHECK-NEXT: %4 = fdiv bfloat %3, %b +; CHECK-NEXT: %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0 +; CHECK-NEXT: %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1 +; CHECK-NEXT: %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0 +; CHECK-NEXT: %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1 +; CHECK-NEXT: %9 = fadd <2 x bfloat> %6, %8 +; CHECK-NEXT: %10 = extractelement <2 x bfloat> %9, i32 0 +; CHECK-NEXT: %11 = extractelement <2 x bfloat> %9, i32 1 +; CHECK-NEXT: %12 = fadd bfloat %10, %11 +; CHECK-NEXT: %13 = fadd bfloat %12, %4 +; CHECK-NEXT: ret bfloat %13 +; + %0 = fadd bfloat %a, %b + %1 = fadd bfloat %0, %b + %2 = fadd bfloat %1, %b + %3 = fmul bfloat %2, %b + %4 = fdiv bfloat %3, %b + %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0 + %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1 + %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0 + %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1 + %9 = fadd <2 x bfloat> %6, %8 + %10 = extractelement <2 x bfloat> %9, i32 0 + %11 = extractelement <2 x bfloat> %9, i32 1 + %12 = fadd bfloat %10, %11 + %13 = fadd bfloat %12, %4 + ret bfloat %13 +} + +define i1 @fast_half_test(half %0, half %1) #0 { +top: +; CHECK-LABEL: @fast_half_test( +; CHECK-NEXT: top: +; CHECK-NEXT: %2 = fsub fast half %0, %1 +; CHECK-NEXT: %3 = fcmp fast oeq half %2, 0xH0000 +; CHECK-NEXT: ret i1 %3 +; + %2 = fsub fast half %0, %1 + %3 = fcmp fast oeq half %2, 0xH0000 + ret i1 %3 +} + +define i1 @fast_bfloat_test(bfloat %0, bfloat %1) #2 { +top: +; CHECK-LABEL: @fast_bfloat_test( +; CHECK-NEXT: top: +; CHECK-NEXT: %2 = fsub fast bfloat %0, %1 +; CHECK-NEXT: %3 = fcmp fast oeq bfloat %2, 0xR0000 +; CHECK-NEXT: ret i1 %3 +; + %2 = fsub fast bfloat %0, %1 + %3 = fcmp fast oeq bfloat %2, 0xR0000 + ret i1 %3 +} + +attributes #0 = { "julia.hasfp16"="false" } +attributes #1 = { "julia.hasfp16"="true" } +attributes #2 = { "julia.hasbf16"="false" } +attributes #3 = { "julia.hasbf16"="true" }