diff --git a/base/mpfr.jl b/base/mpfr.jl index a53852626f42c..01f3d08c44a26 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -337,9 +337,23 @@ Float64(x::BigFloat, r::RoundingMode) = Float64(x, convert(MPFRRoundingMode, r)) Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) = _cpynansgn(ccall((:mpfr_get_flt,:libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x) Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r)) - -# TODO: avoid double rounding -Float16(x::BigFloat) = Float16(Float64(x)) +function Float16(x::BigFloat) :: Float16 + res = Float32(x) + resi = reinterpret(UInt32, res) + if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal + #shift so that the mantissa lines up where it would for normal Float16 + shift = 113-((resi & 0x7f800000)>>23) + if shift<23 + resi |= 0x0080_0000 # set implicit bit + resi >>= shift + end + end + if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values + # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer + res = nextfloat(res, cmp(x, res)) + end + return res +end promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index e566c6b6bd731..fe9bd9dad1366 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -1652,7 +1652,24 @@ extern "C" JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param) extern "C" JL_DLLEXPORT uint16_t __truncdfhf2(double param) { - return float_to_half((float)param); + float res = (float)param; + uint32_t resi; + memcpy(&resi, &res, sizeof(res)); + if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal + // shift so that the mantissa lines up where it would for normal Float16 + uint32_t shift = 113u-((resi & 0x7f800000u)>>23u); + if (shift<23u) { + resi |= 0x00800000; // set implicit bit + resi >>= shift; + } + } + if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values + memcpy(&resi, &res, sizeof(res)); + // adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer + resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res)); + memcpy(&res, &resi, sizeof(res)); + } + return float_to_half(res); } #endif diff --git a/test/float16.jl b/test/float16.jl index 916032131bf5d..6f04a0817dd1c 100644 --- a/test/float16.jl +++ b/test/float16.jl @@ -199,3 +199,25 @@ const minsubf16_32 = Float32(minsubf16) # issues #33076 @test Float16(1f5) == Inf16 + +@testset "conversion to Float16 from" begin + for T in (Float32, Float64, BigFloat) + @testset "conversion from $T" begin + for i in 1:2^16 + f = reinterpret(Float16, UInt16(i-1)) + isfinite(f) || continue + if f < 0 + epsdown = T(eps(f))/2 + epsup = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2 + else + epsup = T(eps(f))/2 + epsdown = issubnormal(f) ? epsup : T(eps(prevfloat(f)))/2 + end + @test isequal(f*(-1)^(f === Float16(0)), Float16(nextfloat(T(f) - epsdown))) + @test isequal(f*(-1)^(f === -Float16(0)), Float16(prevfloat(T(f) + epsup))) + @test isequal(prevfloat(f), Float16(prevfloat(T(f) - epsdown))) + @test isequal(nextfloat(f), Float16(nextfloat(T(f) + epsup))) + end + end + end +end