Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP/RFC/Scary: allow signed Normed numbers #143

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Manifest.toml
25 changes: 16 additions & 9 deletions src/FixedPointNumbers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,25 @@ typemin(::Type{T}) where {T <: FixedPoint} = T(typemin(rawtype(T)), 0)
floatmin(::Type{T}) where {T <: FixedPoint} = eps(T)
floatmax(::Type{T}) where {T <: FixedPoint} = typemax(T)

widen1(::Type{Int8}) = Int16
widen1(::Type{UInt8}) = UInt16
widen1(::Type{Int16}) = Int32
widen1(::Type{UInt16}) = UInt32
widen1(::Type{Int32}) = Int64
widen1(::Type{UInt32}) = UInt64
widen1(::Type{Int64}) = Int128
widen1(::Type{UInt64}) = UInt128
widen1(::Type{Int128}) = Int128
widen1(::Type{Int8}) = Int16
widen1(::Type{UInt8}) = UInt16
widen1(::Type{Int16}) = Int32
widen1(::Type{UInt16}) = UInt32
widen1(::Type{Int32}) = Int64
widen1(::Type{UInt32}) = UInt64
widen1(::Type{Int64}) = Int128
widen1(::Type{UInt64}) = UInt128
widen1(::Type{Int128}) = Int128
widen1(::Type{UInt128}) = UInt128
widen1(x::Integer) = x % widen1(typeof(x))

signedwiden1(::Type{UInt8}) = Int16
signedwiden1(::Type{UInt16}) = Int32
signedwiden1(::Type{UInt32}) = Int64
signedwiden1(::Type{UInt64}) = Int128
signedwiden1(::Type{UInt128}) = Int128
signedwiden1(x::Integer) = x % signedwiden1(typeof(x))

const ShortInts = Union{Int8,UInt8,Int16,UInt16}
const LongInts = Union{UInt64, UInt128, Int64, Int128, BigInt}

Expand Down
170 changes: 93 additions & 77 deletions src/normed.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,34 @@
# Normed{T,f} maps UInts from 0 to 2^f-1 to the range [0.0, 1.0]
# Normed{T,f} maps Integers from 0 to 2^f-1 to the range [0.0, 1.0]
# For example, Normed{UInt8,8} == N0f8 maps 0x00 to 0.0 and 0xff to 1.0

struct Normed{T<:Unsigned,f} <: FixedPoint{T,f}
struct Normed{T<:Integer,f} <: FixedPoint{T,f}
i::T

Normed{T, f}(i::Integer,_) where {T,f} = new{T, f}(i%T) # for setting by raw representation
function Normed{T, f}(i::Integer,_) where {T,f} # 2-arg form for setting by raw representation
isa(f, Int) || error("f must be an Int")
0 < f <= sizeof(T)*8 - (T<:Signed) || error("f must be between 1 and the number of non-sign bits")
new{T, f}(i%T)
end
end

Normed{T, f}(x::AbstractChar) where {T,f} = throw(ArgumentError("Normed cannot be constructed from a Char"))
Normed{T, f}(x::Complex) where {T,f} = Normed{T, f}(convert(real(typeof(x)), x))
Normed{T, f}(x::Base.TwicePrecision) where {T,f} = Normed{T, f}(convert(Float64, x))
Normed{T1,f}(x::Normed{T2,f}) where {T1 <: Unsigned,T2 <: Unsigned,f} = Normed{T1,f}(convert(T1, x.i), 0)

typechar(::Type{X}) where {X <: Normed} = 'N'
signbits(::Type{X}) where {X <: Normed} = 0
Normed{T1,f}(x::Normed{T2,f}) where {T1 <: Integer,T2 <: Integer,f} = Normed{T1,f}(convert(T1, x.i), 0)

typechar(::Type{X}) where X <: Normed{T} where T<:Unsigned = 'N'
typechar(::Type{X}) where X <: Normed{T} where T<:Signed = 'S'
signbits(::Type{X}) where X <: Normed{T} where T<:Unsigned = 0
signbits(::Type{X}) where X <: Normed{T} where T<:Signed = 1

I64(::Type{<:Unsigned}) = UInt64
I64(::Type{<:Signed}) = Int64
I32(::Type{<:Unsigned}) = UInt32
I32(::Type{<:Signed}) = Int32

for T in (UInt8, UInt16, UInt32, UInt64)
for f in 1:sizeof(T)*8
for T in (UInt8, UInt16, UInt32, UInt64, Int16, Int32, Int64)
for f in 1:sizeof(T)*8-(T<:Signed)
sym = Symbol(String(take!(showtype(_iotypealias, Normed{T,f}))))
@eval begin
const $sym = Normed{$T,$f}
Expand All @@ -25,77 +37,82 @@ for T in (UInt8, UInt16, UInt32, UInt64)
end
end

reinterpret(::Type{Normed{T,f}}, x::T) where {T <: Unsigned,f} = Normed{T,f}(x, 0)
reinterpret(::Type{Normed{T,f}}, x::T) where {T <: Integer,f} = Normed{T,f}(x, 0)

zero(::Type{Normed{T,f}}) where {T,f} = Normed{T,f}(zero(T),0)
function oneunit(::Type{T}) where {T <: Normed}
T(typemax(rawtype(T)) >> (8*sizeof(T)-nbitsfrac(T)), 0)
zero(::Type{Normed{T,f}}) where {T <: Integer,f} = Normed{T,f}(zero(T),0)
function oneunit(::Type{N}) where N <: Normed{T,f} where {T,f}
N(typemax(rawtype(N)) >> (8*sizeof(N)-nbitsfrac(N)-(T<:Signed)), 0)
end
one(::Type{T}) where {T <: Normed} = oneunit(T)
zero(x::Normed) = zero(typeof(x))
oneunit(x::Normed) = one(typeof(x))
one(x::Normed) = oneunit(x)
rawone(v) = reinterpret(one(v))

# Conversions
function Normed{T,f}(x::Normed{T2}) where {T <: Unsigned,T2 <: Unsigned,f}
# More construction and conversion
function Normed{T,f}(x::Normed{T2}) where {T <: Integer,T2 <: Integer,f}
U = Normed{T,f}
y = round((rawone(U)/rawone(x))*reinterpret(x))
(0 <= y) & (y <= typemax(T)) || throw_converterror(U, x)
(typemin(T) <= y) & (y <= typemax(T)) || throw_converterror(U, x)
reinterpret(U, _unsafe_trunc(T, y))
end
N0f16(x::N0f8) = reinterpret(N0f16, convert(UInt16, 0x0101*reinterpret(x)))

(::Type{U})(x::Real) where {U <: Normed} = _convert(U, x)
(::Type{N})(x::Real) where {N <: Normed} = _convert(N, x)

function _convert(::Type{U}, x) where {T, f, U <: Normed{T,f}}
if T == UInt128 # for UInt128, we can't widen
function _convert(::Type{N}, x) where {T <: Integer, f, N <: Normed{T,f}}
if T === UInt128 || T === Int128 # for [U]Int128, we can't widen
# the upper limit is not exact
(0 <= x) & (x <= (typemax(T)/rawone(U))) || throw_converterror(U, x)
y = round(rawone(U)*x)
((typemin(T)/rawone(N)) <= x) & (x <= (typemax(T)/rawone(N))) || throw_converterror(N, x)
y = round(rawone(N)*x)
else
y = round(widen1(rawone(U))*x)
(0 <= y) & (y <= typemax(T)) || throw_converterror(U, x)
y = round(widen1(rawone(N))*x)
(typemin(T) <= y) & (y <= typemax(T)) || throw_converterror(N, x)
end
reinterpret(U, _unsafe_trunc(T, y))
reinterpret(N, _unsafe_trunc(T, y))
end
# Prevent overflow (https://discourse.julialang.org/t/saving-greater-than-8-bit-images/6057)
function _convert(::Type{U}, x::Float16) where {T, f, U <: Normed{T,f}}
if Float16(typemax(T)/rawone(U)) > Float32(typemax(T)/rawone(U))
x == Float16(typemax(T)/rawone(U)) && return typemax(U)
function _convert(::Type{N}, x::Float16) where {T <: Integer, f, N <: Normed{T,f}}
if Float16(typemax(T)/rawone(N)) > Float32(typemax(T)/rawone(N))
x == Float16(typemax(T)/rawone(N)) && return typemax(N)
if T <: Signed
x == Float16(typemin(T)/rawone(N)) && return typemin(N)
end
end
return _convert(U, Float32(x))
return _convert(N, Float32(x))
end
function _convert(::Type{U}, x::Tf) where {T, f, U <: Normed{T,f}, Tf <: Union{Float32, Float64}}
if T == UInt128 && f == 53
0 <= x <= Tf(3.777893186295717e22) || throw_converterror(U, x)
function _convert(::Type{N}, x::Tf) where {T <: Integer, f, N <: Normed{T,f}, Tf <: Union{Float32, Float64}}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sorry but this method is currently optimized for non-negative numbers and cannot handle negative numbers. (pay attention to k.) Of course, this is a trivial issue, but I mean, "optimization is specialization".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I hadn't checked any of this, this is WIP.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't care about that much in FixedPointNumbers. However, it is generally not good to ask other packages for such verification and modification. If this is essential to the design, we should change it aggressively, but I don't think so.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I understand. All I meant was that this was a draft I decided to let people know I was working on. I will fix this before we merge this if we decide to do so.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a simple problem. The current Normed is always unsigned, but your Normed is not always unsigned.

if T === UInt128 && f == 53
Tf(0) <= x <= Tf(3.777893186295717e22) || throw_converterror(N, x)
elseif T === Int128 && f == 53
Tf(-1.888946593147859e22) <= x <= Tf(1.888946593147859e22) || throw_converterror(N, x)
else
0 <= x <= Tf((typemax(T)-rawone(U))/rawone(U)+1) || throw_converterror(U, x)
Tf((typemin(T)+rawone(N))/rawone(N)-1) <= x <= Tf((typemax(T)-rawone(N))/rawone(N)+1) || throw_converterror(N, x)
end

significand_bits = Tf == Float64 ? 52 : 23
significand_bits = Tf === Float64 ? 52 : 23
if f <= (significand_bits + 1) && sizeof(T) * 8 < significand_bits
return reinterpret(U, unsafe_trunc(T, round(rawone(U) * x)))
return reinterpret(N, unsafe_trunc(T, round(rawone(N) * x)))
end
# cf. the implementation of `frexp`
Tw = f < sizeof(T) * 8 ? T : widen1(T)
bits = sizeof(Tw) * 8 - 1
xu = reinterpret(Tf == Float64 ? UInt64 : UInt32, x)
xu = reinterpret(Tf === Float64 ? I64(T) : I32(T), x)
k = Int(xu >> significand_bits)
k == 0 && return zero(U) # neglect subnormal numbers
k == 0 && return zero(N) # neglect subnormal numbers
significand = xu | (one(xu) << significand_bits)
yh = unsafe_trunc(Tw, significand) << (bits - significand_bits)
exponent_bias = Tf == Float64 ? 1023 : 127
ex = exponent_bias - k + bits - f
yi = bits >= f ? yh - (yh >> f) : yh
if ex <= 0
ex == 0 && return reinterpret(U, unsafe_trunc(T, yi))
ex != -1 || signbit(signed(yi)) && return typemax(U)
return reinterpret(U, unsafe_trunc(T, yi + yi))
ex == 0 && return reinterpret(N, unsafe_trunc(T, yi))
ex != -1 || signbit(signed(yi)) && return typemax(N)
return reinterpret(N, unsafe_trunc(T, yi + yi))
end
ex > bits && return reinterpret(U, ex == bits + 1 ? one(T) : zero(T))
ex > bits && return reinterpret(N, ex == bits + 1 ? one(T) : zero(T))
yi += one(Tw)<<((ex - 1) & bits) # RoundNearestTiesUp
return reinterpret(U, unsafe_trunc(T, yi >> (ex & bits)))
return reinterpret(N, unsafe_trunc(T, yi >> (ex & bits)))
end

rem(x::T, ::Type{T}) where {T <: Normed} = x
Expand Down Expand Up @@ -125,14 +142,18 @@ function (::Type{T})(x::Normed) where {T <: AbstractFloat}
convert(T, y) # needed for types like Float16 which promote arithmetic to Float32
end

function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16, UInt32}, f}
# A slightly faster copysign (one that avoids type-piracy)
setsign(x::Float32, i::UInt32) = x
setsign(x::Float32, i::Int32) = reinterpret(Float32, reinterpret(UInt32, x) | (reinterpret(UInt32, i) & reinterpret(UInt32, typemin(Int32))))

function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16, UInt32, Int8, Int16, Int32}, f}
f == 1 ? Float16(x.i) : Float16(Float32(x))
end
function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128}, f}
function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128, Int64, Int128}, f}
f == 1 ? Float16(x.i) : Float16(Float64(x))
end

function Base.Float32(x::Normed{UInt8,f}) where f
function Base.Float32(x::Normed{<:Union{UInt8,Int8},f}) where f
f == 1 && return Float32(x.i)
f == 2 && return Float32(Int32(x.i) * 0x101) * @f32(0x550055p-32)
f == 3 && return Float32(Int32(x.i) * 0x00b) * @f32(0xd4c77bp-30)
Expand All @@ -143,7 +164,7 @@ function Base.Float32(x::Normed{UInt8,f}) where f
f == 8 && return Float32(Int32(x.i) * 0x155) * @f32(0xc0f0fdp-40)
0.0f0
end
function Base.Float32(x::Normed{UInt16,f}) where f
function Base.Float32(x::Normed{<:Union{UInt16,Int16},f}) where f
f32 = Float32(x.i)
f == 1 && return f32
f == 2 && return f32 * @f32(0x55p-8) + f32 * @f32(0x555555p-32)
Expand All @@ -155,19 +176,19 @@ function Base.Float32(x::Normed{UInt16,f}) where f
f == 16 && return f32 * @f32(0x01p-16) + f32 * @f32(0x010001p-48)
Float32(x.i / rawone(x))
end
function Base.Float32(x::Normed{UInt32,f}) where f
function Base.Float32(x::Normed{T,f}) where {T <: Union{UInt32,Int32}, f}
f == 1 && return Float32(x.i)
i32 = unsafe_trunc(Int32, x.i)
if f == 32
rh, rl = Float32(i32>>>16), Float32((i32&0xFFFF)<<8 | (i32>>>24))
return muladd(rh, @f32(0x1p-16), rl * @f32(0x1p-40))
return setsign(muladd(rh, @f32(0x1p-16), rl * @f32(0x1p-40)), x.i)
elseif f >= 25
rh, rl = Float32(i32>>>16),Float32(((i32&0xFFFF)<<14) + (i32>>>(f-14)))
return muladd(rh, Float32(@exp2(16-f)), rl * Float32(@exp2(-14-f)))
return setsign(muladd(rh, Float32(@exp2(16-f)), rl * Float32(@exp2(-14-f))), x.i)
end
# FIXME: avoid the branch in native x86_64 (non-SIMD) codes
m = ifelse(i32 < 0, 0x1p32 * inv_rawone(x), 0.0)
Float32(muladd(Float64(i32), inv_rawone(x), m))
return setsign(Float32(muladd(Float64(i32), inv_rawone(x), m)), x.i)
end
function Base.Float32(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128}, f}
f == 1 ? Float32(x.i) : Float32(Float64(x))
Expand All @@ -176,7 +197,11 @@ end
function Base.Float64(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16}, f}
Float64(Normed{UInt32,f}(x))
end
function Base.Float64(x::Normed{UInt32,f}) where f
function Base.Float64(x::Normed{Ti,f}) where {Ti <: Union{Int8, Int16}, f}
Float64(Normed{Int32,f}(x))
end

function Base.Float64(x::Normed{<:Union{UInt32,Int32},f}) where f
f64 = Float64(x.i)
f == 1 && return f64
f == 2 && return (f64 * 0x040001) * 0x15555000015555p-72
Expand Down Expand Up @@ -206,7 +231,7 @@ function Base.Float64(x::Normed{UInt32,f}) where f
f == 32 && return (f64 * 0x010101) * 0x00ff0000ffff01p-96
f64 / rawone(x)
end
function Base.Float64(x::Normed{UInt64,f}) where f
function Base.Float64(x::Normed{UInt64,f}) where f # TODO: optimized version for Int64
f == 1 && return Float64(x.i)
if f >= 53
rh = Float64(unsafe_trunc(Int64, x.i >> 16)) * @exp2(16-f) # upper 48 bits
Expand All @@ -215,7 +240,7 @@ function Base.Float64(x::Normed{UInt64,f}) where f
end
x.i / rawone(x)
end
function Base.Float64(x::Normed{UInt128,f}) where f
function Base.Float64(x::Normed{UInt128,f}) where f # TODO: optimized version for Int128
f == 1 && return Float64(x.i)
ih, il = unsafe_trunc(Int64, x.i>>64), unsafe_trunc(Int64, x.i)
rh = Float64(ih>>>16) * @exp2(f <= 53 ? 80 : 80 - f) # upper 48 bits
Expand All @@ -241,7 +266,8 @@ Base.Rational{Ti}(x::Normed) where {Ti <: Integer} = convert(Ti, reinterpret(x))
Base.Rational(x::Normed) = reinterpret(x)//rawone(x)

# Traits
abs(x::Normed) = x
abs(x::Normed{<:Unsigned}) = x
abs(x::T) where T<:Normed = T(abs(x.i), 0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does not abs(::Normed{<:Signed}) return Normed{<:Unsigned}? (Of course, this is not a question.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try this: abs(Int8(-5)). In general Julia doesn't change the number type, even to the point where abs(typemin(Int8)) returns a negative number!

Copy link
Collaborator

@kimikage kimikage Nov 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right. Therefore, it can be concluded that -(x::N0f8, y::N0f8) = S7f8 (Int16(x.i) - Int16(y.i), 0) is not good! 😝

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, this is in conflict with the rest of Julia. I guess we could return Normed{<:Unsigned}, since the only other "always agrees with expectations" is to widen which also changes the type.


(-)(x::T) where {T <: Normed} = T(-reinterpret(x), 0)
(~)(x::T) where {T <: Normed} = T(~reinterpret(x), 0)
Expand All @@ -257,48 +283,38 @@ abs(x::Normed) = x

# Functions
trunc(x::T) where {T <: Normed} = T(div(reinterpret(x), rawone(T))*rawone(T),0)
floor(x::T) where {T <: Normed} = trunc(x)
floor(x::Normed{T}) where {T <: Unsigned} = trunc(x)
function floor(x::Normed{T}) where {T <: Signed}
d, r = divrem(reinterpret(x), rawone(x))
return typeof(x)((d - signbit(r))*rawone(x), 0)
end
function round(x::Normed{T,f}) where {T,f}
mask = convert(T, 1<<(f-1))
y = trunc(x)
return convert(T, reinterpret(x)-reinterpret(y)) & mask>0 ?
Normed{T,f}(y+oneunit(Normed{T,f})) : y
d, r = divrem(reinterpret(x), rawone(x))
return Normed{T,f}((d + r>>(f-1))*rawone(x), 0)
end
function ceil(x::Normed{T,f}) where {T,f}
k = 8*sizeof(T)-f
mask = (typemax(T)<<k)>>k
y = trunc(x)
return convert(T, reinterpret(x)-reinterpret(y)) & (mask)>0 ?
Normed{T,f}(y+oneunit(Normed{T,f})) : y
d, r = divrem(reinterpret(x), rawone(x))
return Normed{T,f}((d + (r>zero(r)))*rawone(x), 0)
end

trunc(::Type{T}, x::Normed) where {T <: Integer} = convert(T, div(reinterpret(x), rawone(x)))
round(::Type{T}, x::Normed) where {T <: Integer} = round(T, reinterpret(x)/rawone(x))
floor(::Type{T}, x::Normed) where {T <: Integer} = trunc(T, x)
ceil(::Type{T}, x::Normed) where {T <: Integer} = ceil(T, reinterpret(x)/rawone(x))
round(::Type{T}, x::Normed) where {T <: Integer} = round(T, float(x))
floor(::Type{T}, x::Normed{<:Unsigned}) where {T <: Integer} = trunc(T, x)
floor(::Type{T}, x::Normed{<:Signed}) where {T <: Integer} = floor(T, float(x))
ceil(::Type{T}, x::Normed) where {T <: Integer} = ceil(T, float(x))

isfinite(x::Normed) = true
isnan(x::Normed) = false
isinf(x::Normed) = false

bswap(x::Normed{UInt8,f}) where {f} = x
bswap(x::Normed) = typeof(x)(bswap(reinterpret(x)),0)
bswap(x::Normed{<:Union{UInt8,Int8},f}) where {f} = x
bswap(x::Normed) = typeof(x)(bswap(reinterpret(x)), 0)

function minmax(x::T, y::T) where {T <: Normed}
a, b = minmax(reinterpret(x), reinterpret(y))
T(a,0), T(b,0)
end

# Iteration
# The main subtlety here is that iterating over N0f8(0):N0f8(1) will wrap around
# unless we iterate using a wider type
@inline start(r::StepRange{T}) where {T <: Normed} = widen1(reinterpret(r.start))
@inline next(r::StepRange{T}, i::Integer) where {T <: Normed} = (T(i,0), i+reinterpret(r.step))
@inline function done(r::StepRange{T}, i::Integer) where {T <: Normed}
i1, i2 = reinterpret(r.start), reinterpret(r.stop)
isempty(r) | (i < min(i1, i2)) | (i > max(i1, i2))
end

function decompose(x::Normed)
g = gcd(reinterpret(x), rawone(x))
div(reinterpret(x),g), 0, div(rawone(x),g)
Expand Down
Loading