JuliaMath · timholy · Nov 27, 2019 · kimikage · Nov 28, 2019 · timholy
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+Manifest.toml
diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl
@@ -60,18 +60,25 @@ typemin(::Type{T}) where {T <: FixedPoint} = T(typemin(rawtype(T)), 0)
 floatmin(::Type{T}) where {T <: FixedPoint} = eps(T)
 floatmax(::Type{T}) where {T <: FixedPoint} = typemax(T)
 
-widen1(::Type{Int8})   = Int16
-widen1(::Type{UInt8})  = UInt16
-widen1(::Type{Int16})  = Int32
-widen1(::Type{UInt16}) = UInt32
-widen1(::Type{Int32})  = Int64
-widen1(::Type{UInt32}) = UInt64
-widen1(::Type{Int64})  = Int128
-widen1(::Type{UInt64}) = UInt128
-widen1(::Type{Int128}) = Int128
+widen1(::Type{Int8})    = Int16
+widen1(::Type{UInt8})   = UInt16
+widen1(::Type{Int16})   = Int32
+widen1(::Type{UInt16})  = UInt32
+widen1(::Type{Int32})   = Int64
+widen1(::Type{UInt32})  = UInt64
+widen1(::Type{Int64})   = Int128
+widen1(::Type{UInt64})  = UInt128
+widen1(::Type{Int128})  = Int128
 widen1(::Type{UInt128}) = UInt128
 widen1(x::Integer) = x % widen1(typeof(x))
 
+signedwiden1(::Type{UInt8})   = Int16
+signedwiden1(::Type{UInt16})  = Int32
+signedwiden1(::Type{UInt32})  = Int64
+signedwiden1(::Type{UInt64})  = Int128
+signedwiden1(::Type{UInt128}) = Int128
+signedwiden1(x::Integer) = x % signedwiden1(typeof(x))
+
 const ShortInts = Union{Int8,UInt8,Int16,UInt16}
 const LongInts = Union{UInt64, UInt128, Int64, Int128, BigInt}
 

diff --git a/src/normed.jl b/src/normed.jl
@@ -1,22 +1,34 @@
-# Normed{T,f} maps UInts from 0 to 2^f-1 to the range [0.0, 1.0]
+# Normed{T,f} maps Integers from 0 to 2^f-1 to the range [0.0, 1.0]
 # For example, Normed{UInt8,8} == N0f8 maps 0x00 to 0.0 and 0xff to 1.0
 
-struct Normed{T<:Unsigned,f} <: FixedPoint{T,f}
+struct Normed{T<:Integer,f} <: FixedPoint{T,f}
     i::T
 
-    Normed{T, f}(i::Integer,_) where {T,f} = new{T, f}(i%T)   # for setting by raw representation
+    function Normed{T, f}(i::Integer,_) where {T,f}    # 2-arg form for setting by raw representation
+        isa(f, Int) || error("f must be an Int")
+        0 < f <= sizeof(T)*8 - (T<:Signed) || error("f must be between 1 and the number of non-sign bits")
+        new{T, f}(i%T)
+    end
 end
 
 Normed{T, f}(x::AbstractChar) where {T,f} = throw(ArgumentError("Normed cannot be constructed from a Char"))
 Normed{T, f}(x::Complex) where {T,f} = Normed{T, f}(convert(real(typeof(x)), x))
 Normed{T, f}(x::Base.TwicePrecision) where {T,f} = Normed{T, f}(convert(Float64, x))
-Normed{T1,f}(x::Normed{T2,f}) where {T1 <: Unsigned,T2 <: Unsigned,f} = Normed{T1,f}(convert(T1, x.i), 0)
 
-typechar(::Type{X}) where {X <: Normed} = 'N'
-signbits(::Type{X}) where {X <: Normed} = 0
+Normed{T1,f}(x::Normed{T2,f}) where {T1 <: Integer,T2 <: Integer,f} = Normed{T1,f}(convert(T1, x.i), 0)
+
+typechar(::Type{X}) where X <: Normed{T} where T<:Unsigned = 'N'
+typechar(::Type{X}) where X <: Normed{T} where T<:Signed   = 'S'
+signbits(::Type{X}) where X <: Normed{T} where T<:Unsigned = 0
+signbits(::Type{X}) where X <: Normed{T} where T<:Signed   = 1
+
+I64(::Type{<:Unsigned}) = UInt64
+I64(::Type{<:Signed})   = Int64
+I32(::Type{<:Unsigned}) = UInt32
+I32(::Type{<:Signed})   = Int32
 
-for T in (UInt8, UInt16, UInt32, UInt64)
-    for f in 1:sizeof(T)*8
+for T in (UInt8, UInt16, UInt32, UInt64, Int16, Int32, Int64)
+    for f in 1:sizeof(T)*8-(T<:Signed)
         sym = Symbol(String(take!(showtype(_iotypealias, Normed{T,f}))))
         @eval begin
             const $sym = Normed{$T,$f}
@@ -25,77 +37,82 @@ for T in (UInt8, UInt16, UInt32, UInt64)
     end
 end
 
-reinterpret(::Type{Normed{T,f}}, x::T) where {T <: Unsigned,f} = Normed{T,f}(x, 0)
+reinterpret(::Type{Normed{T,f}}, x::T) where {T <: Integer,f} = Normed{T,f}(x, 0)
 
-zero(::Type{Normed{T,f}}) where {T,f} = Normed{T,f}(zero(T),0)
-function oneunit(::Type{T}) where {T <: Normed}
-    T(typemax(rawtype(T)) >> (8*sizeof(T)-nbitsfrac(T)), 0)
+zero(::Type{Normed{T,f}}) where {T <: Integer,f} = Normed{T,f}(zero(T),0)
+function oneunit(::Type{N}) where N <: Normed{T,f} where {T,f}
+    N(typemax(rawtype(N)) >> (8*sizeof(N)-nbitsfrac(N)-(T<:Signed)), 0)
 end
 one(::Type{T}) where {T <: Normed} = oneunit(T)
 zero(x::Normed) = zero(typeof(x))
 oneunit(x::Normed) =  one(typeof(x))
 one(x::Normed) = oneunit(x)
 rawone(v) = reinterpret(one(v))
 
-# Conversions
-function Normed{T,f}(x::Normed{T2}) where {T <: Unsigned,T2 <: Unsigned,f}
+# More construction and conversion
+function Normed{T,f}(x::Normed{T2}) where {T <: Integer,T2 <: Integer,f}
     U = Normed{T,f}
     y = round((rawone(U)/rawone(x))*reinterpret(x))
-    (0 <= y) & (y <= typemax(T)) || throw_converterror(U, x)
+    (typemin(T) <= y) & (y <= typemax(T)) || throw_converterror(U, x)
     reinterpret(U, _unsafe_trunc(T, y))
 end
 N0f16(x::N0f8) = reinterpret(N0f16, convert(UInt16, 0x0101*reinterpret(x)))
 
-(::Type{U})(x::Real) where {U <: Normed} = _convert(U, x)
+(::Type{N})(x::Real) where {N <: Normed} = _convert(N, x)
 
-function _convert(::Type{U}, x) where {T, f, U <: Normed{T,f}}
-    if T == UInt128 # for UInt128, we can't widen
+function _convert(::Type{N}, x) where {T <: Integer, f, N <: Normed{T,f}}
+    if T === UInt128 || T === Int128 # for [U]Int128, we can't widen
         # the upper limit is not exact
-        (0 <= x) & (x <= (typemax(T)/rawone(U))) || throw_converterror(U, x)
-        y = round(rawone(U)*x)
+        ((typemin(T)/rawone(N)) <= x) & (x <= (typemax(T)/rawone(N))) || throw_converterror(N, x)
+        y = round(rawone(N)*x)
     else
-        y = round(widen1(rawone(U))*x)
-        (0 <= y) & (y <= typemax(T)) || throw_converterror(U, x)
+        y = round(widen1(rawone(N))*x)
+        (typemin(T) <= y) & (y <= typemax(T)) || throw_converterror(N, x)
     end
-    reinterpret(U, _unsafe_trunc(T, y))
+    reinterpret(N, _unsafe_trunc(T, y))
 end
 # Prevent overflow (https://discourse.julialang.org/t/saving-greater-than-8-bit-images/6057)
-function _convert(::Type{U}, x::Float16) where {T, f, U <: Normed{T,f}}
-    if Float16(typemax(T)/rawone(U)) > Float32(typemax(T)/rawone(U))
-        x == Float16(typemax(T)/rawone(U)) && return typemax(U)
+function _convert(::Type{N}, x::Float16) where {T <: Integer, f, N <: Normed{T,f}}
+    if Float16(typemax(T)/rawone(N)) > Float32(typemax(T)/rawone(N))
+        x == Float16(typemax(T)/rawone(N)) && return typemax(N)
+        if T <: Signed
+            x == Float16(typemin(T)/rawone(N)) && return typemin(N)
+        end
     end
-    return _convert(U, Float32(x))
+    return _convert(N, Float32(x))
 end
-function _convert(::Type{U}, x::Tf) where {T, f, U <: Normed{T,f}, Tf <: Union{Float32, Float64}}
-    if T == UInt128 && f == 53
-        0 <= x <= Tf(3.777893186295717e22) || throw_converterror(U, x)
+function _convert(::Type{N}, x::Tf) where {T <: Integer, f, N <: Normed{T,f}, Tf <: Union{Float32, Float64}}
+    if T === UInt128 && f == 53
+        Tf(0) <= x <= Tf(3.777893186295717e22) || throw_converterror(N, x)
+    elseif T === Int128 && f == 53
+        Tf(-1.888946593147859e22) <= x <= Tf(1.888946593147859e22) || throw_converterror(N, x)
     else
-        0 <= x <= Tf((typemax(T)-rawone(U))/rawone(U)+1) || throw_converterror(U, x)
+        Tf((typemin(T)+rawone(N))/rawone(N)-1) <= x <= Tf((typemax(T)-rawone(N))/rawone(N)+1) || throw_converterror(N, x)
     end
 
-    significand_bits = Tf == Float64 ? 52 : 23
+    significand_bits = Tf === Float64 ? 52 : 23
     if f <= (significand_bits + 1) && sizeof(T) * 8 < significand_bits
-        return reinterpret(U, unsafe_trunc(T, round(rawone(U) * x)))
+        return reinterpret(N, unsafe_trunc(T, round(rawone(N) * x)))
     end
     # cf. the implementation of `frexp`
     Tw = f < sizeof(T) * 8 ? T : widen1(T)
     bits = sizeof(Tw) * 8 - 1
-    xu = reinterpret(Tf == Float64 ? UInt64 : UInt32, x)
+    xu = reinterpret(Tf === Float64 ? I64(T) : I32(T), x)
     k = Int(xu >> significand_bits)
-    k == 0 && return zero(U) # neglect subnormal numbers
+    k == 0 && return zero(N) # neglect subnormal numbers
     significand = xu | (one(xu) << significand_bits)
     yh = unsafe_trunc(Tw, significand) << (bits - significand_bits)
     exponent_bias = Tf == Float64 ? 1023 : 127
     ex = exponent_bias - k + bits - f
     yi = bits >= f ? yh - (yh >> f) : yh
     if ex <= 0
-        ex == 0 && return reinterpret(U, unsafe_trunc(T, yi))
-        ex != -1 || signbit(signed(yi)) && return typemax(U)
-        return reinterpret(U, unsafe_trunc(T, yi + yi))
+        ex == 0 && return reinterpret(N, unsafe_trunc(T, yi))
+        ex != -1 || signbit(signed(yi)) && return typemax(N)
+        return reinterpret(N, unsafe_trunc(T, yi + yi))
     end
-    ex > bits && return reinterpret(U, ex == bits + 1 ? one(T) : zero(T))
+    ex > bits && return reinterpret(N, ex == bits + 1 ? one(T) : zero(T))
     yi += one(Tw)<<((ex - 1) & bits) # RoundNearestTiesUp
-    return reinterpret(U, unsafe_trunc(T, yi >> (ex & bits)))
+    return reinterpret(N, unsafe_trunc(T, yi >> (ex & bits)))
 end
 
 rem(x::T, ::Type{T}) where {T <: Normed} = x
@@ -125,14 +142,18 @@ function (::Type{T})(x::Normed) where {T <: AbstractFloat}
     convert(T, y)  # needed for types like Float16 which promote arithmetic to Float32
 end
 
-function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16, UInt32}, f}
+# A slightly faster copysign (one that avoids type-piracy)
+setsign(x::Float32, i::UInt32) = x
+setsign(x::Float32, i::Int32)  = reinterpret(Float32, reinterpret(UInt32, x) | (reinterpret(UInt32, i) & reinterpret(UInt32, typemin(Int32))))
+
+function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16, UInt32, Int8, Int16, Int32}, f}
     f == 1 ? Float16(x.i) : Float16(Float32(x))
 end
-function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128}, f}
+function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128, Int64, Int128}, f}
     f == 1 ? Float16(x.i) : Float16(Float64(x))
 end
 
-function Base.Float32(x::Normed{UInt8,f}) where f
+function Base.Float32(x::Normed{<:Union{UInt8,Int8},f}) where f
     f == 1 && return Float32(x.i)
     f == 2 && return Float32(Int32(x.i) * 0x101) * @f32(0x550055p-32)
     f == 3 && return Float32(Int32(x.i) * 0x00b) * @f32(0xd4c77bp-30)
@@ -143,7 +164,7 @@ function Base.Float32(x::Normed{UInt8,f}) where f
     f == 8 && return Float32(Int32(x.i) * 0x155) * @f32(0xc0f0fdp-40)
     0.0f0
 end
-function Base.Float32(x::Normed{UInt16,f}) where f
+function Base.Float32(x::Normed{<:Union{UInt16,Int16},f}) where f
     f32 = Float32(x.i)
     f ==  1 && return f32
     f ==  2 && return f32 * @f32(0x55p-8)  + f32 * @f32(0x555555p-32)
@@ -155,19 +176,19 @@ function Base.Float32(x::Normed{UInt16,f}) where f
     f == 16 && return f32 * @f32(0x01p-16) + f32 * @f32(0x010001p-48)
     Float32(x.i / rawone(x))
 end
-function Base.Float32(x::Normed{UInt32,f}) where f
+function Base.Float32(x::Normed{T,f}) where {T <: Union{UInt32,Int32}, f}
     f == 1 && return Float32(x.i)
     i32 = unsafe_trunc(Int32, x.i)
     if f == 32
         rh, rl = Float32(i32>>>16), Float32((i32&0xFFFF)<<8 | (i32>>>24))
-        return muladd(rh, @f32(0x1p-16), rl * @f32(0x1p-40))
+        return setsign(muladd(rh, @f32(0x1p-16), rl * @f32(0x1p-40)), x.i)
     elseif f >= 25
         rh, rl = Float32(i32>>>16),Float32(((i32&0xFFFF)<<14) + (i32>>>(f-14)))
-        return muladd(rh, Float32(@exp2(16-f)), rl * Float32(@exp2(-14-f)))
+        return setsign(muladd(rh, Float32(@exp2(16-f)), rl * Float32(@exp2(-14-f))), x.i)
     end
     # FIXME: avoid the branch in native x86_64 (non-SIMD) codes
     m = ifelse(i32 < 0, 0x1p32 * inv_rawone(x), 0.0)
-    Float32(muladd(Float64(i32), inv_rawone(x), m))
+    return setsign(Float32(muladd(Float64(i32), inv_rawone(x), m)), x.i)
 end
 function Base.Float32(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128}, f}
     f == 1 ? Float32(x.i) : Float32(Float64(x))
@@ -176,7 +197,11 @@ end
 function Base.Float64(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16}, f}
     Float64(Normed{UInt32,f}(x))
 end
-function Base.Float64(x::Normed{UInt32,f}) where f
+function Base.Float64(x::Normed{Ti,f}) where {Ti <: Union{Int8, Int16}, f}
+    Float64(Normed{Int32,f}(x))
+end
+
+function Base.Float64(x::Normed{<:Union{UInt32,Int32},f}) where f
     f64 = Float64(x.i)
     f ==  1 && return f64
     f ==  2 && return (f64 * 0x040001) * 0x15555000015555p-72
@@ -206,7 +231,7 @@ function Base.Float64(x::Normed{UInt32,f}) where f
     f == 32 && return (f64 * 0x010101) * 0x00ff0000ffff01p-96
     f64 / rawone(x)
 end
-function Base.Float64(x::Normed{UInt64,f}) where f
+function Base.Float64(x::Normed{UInt64,f}) where f   # TODO: optimized version for Int64
     f == 1 && return Float64(x.i)
     if f >= 53
         rh = Float64(unsafe_trunc(Int64, x.i >> 16)) * @exp2(16-f) # upper 48 bits
@@ -215,7 +240,7 @@ function Base.Float64(x::Normed{UInt64,f}) where f
     end
     x.i / rawone(x)
 end
-function Base.Float64(x::Normed{UInt128,f}) where f
+function Base.Float64(x::Normed{UInt128,f}) where f   # TODO: optimized version for Int128
     f == 1 && return Float64(x.i)
     ih, il = unsafe_trunc(Int64, x.i>>64), unsafe_trunc(Int64, x.i)
     rh = Float64(ih>>>16) * @exp2(f <= 53 ? 80 : 80 - f) # upper 48 bits
@@ -241,7 +266,8 @@ Base.Rational{Ti}(x::Normed) where {Ti <: Integer} = convert(Ti, reinterpret(x))
 Base.Rational(x::Normed) = reinterpret(x)//rawone(x)
 
 # Traits
-abs(x::Normed) = x
+abs(x::Normed{<:Unsigned}) = x
+abs(x::T) where T<:Normed  = T(abs(x.i), 0)
 
 (-)(x::T) where {T <: Normed} = T(-reinterpret(x), 0)
 (~)(x::T) where {T <: Normed} = T(~reinterpret(x), 0)
@@ -257,48 +283,38 @@ abs(x::Normed) = x
 
 # Functions
 trunc(x::T) where {T <: Normed} = T(div(reinterpret(x), rawone(T))*rawone(T),0)
-floor(x::T) where {T <: Normed} = trunc(x)
+floor(x::Normed{T}) where {T <: Unsigned} = trunc(x)
+function floor(x::Normed{T}) where {T <: Signed}
+    d, r = divrem(reinterpret(x), rawone(x))
+    return typeof(x)((d - signbit(r))*rawone(x), 0)
+end
 function round(x::Normed{T,f}) where {T,f}
-    mask = convert(T, 1<<(f-1))
-    y = trunc(x)
-    return convert(T, reinterpret(x)-reinterpret(y)) & mask>0 ?
-            Normed{T,f}(y+oneunit(Normed{T,f})) : y
+    d, r = divrem(reinterpret(x), rawone(x))
+    return Normed{T,f}((d + r>>(f-1))*rawone(x), 0)
 end
 function ceil(x::Normed{T,f}) where {T,f}
-    k = 8*sizeof(T)-f
-    mask = (typemax(T)<<k)>>k
-    y = trunc(x)
-    return convert(T, reinterpret(x)-reinterpret(y)) & (mask)>0 ?
-            Normed{T,f}(y+oneunit(Normed{T,f})) : y
+    d, r = divrem(reinterpret(x), rawone(x))
+    return Normed{T,f}((d + (r>zero(r)))*rawone(x), 0)
 end
 
 trunc(::Type{T}, x::Normed) where {T <: Integer} = convert(T, div(reinterpret(x), rawone(x)))
-round(::Type{T}, x::Normed) where {T <: Integer} = round(T, reinterpret(x)/rawone(x))
-floor(::Type{T}, x::Normed) where {T <: Integer} = trunc(T, x)
- ceil(::Type{T}, x::Normed) where {T <: Integer} =  ceil(T, reinterpret(x)/rawone(x))
+round(::Type{T}, x::Normed) where {T <: Integer} = round(T, float(x))
+floor(::Type{T}, x::Normed{<:Unsigned}) where {T <: Integer} = trunc(T, x)
+floor(::Type{T}, x::Normed{<:Signed})   where {T <: Integer} = floor(T, float(x))
+ ceil(::Type{T}, x::Normed) where {T <: Integer} =  ceil(T, float(x))
 
 isfinite(x::Normed) = true
 isnan(x::Normed) = false
 isinf(x::Normed) = false
 
-bswap(x::Normed{UInt8,f}) where {f} = x
-bswap(x::Normed)  = typeof(x)(bswap(reinterpret(x)),0)
+bswap(x::Normed{<:Union{UInt8,Int8},f}) where {f} = x
+bswap(x::Normed)  = typeof(x)(bswap(reinterpret(x)), 0)
 
 function minmax(x::T, y::T) where {T <: Normed}
     a, b = minmax(reinterpret(x), reinterpret(y))
     T(a,0), T(b,0)
 end
 
-# Iteration
-# The main subtlety here is that iterating over N0f8(0):N0f8(1) will wrap around
-# unless we iterate using a wider type
-@inline start(r::StepRange{T}) where {T <: Normed} = widen1(reinterpret(r.start))
-@inline next(r::StepRange{T}, i::Integer) where {T <: Normed} = (T(i,0), i+reinterpret(r.step))
-@inline function done(r::StepRange{T}, i::Integer) where {T <: Normed}
-    i1, i2 = reinterpret(r.start), reinterpret(r.stop)
-    isempty(r) | (i < min(i1, i2)) | (i > max(i1, i2))
-end
-
 function decompose(x::Normed)
     g = gcd(reinterpret(x), rawone(x))
     div(reinterpret(x),g), 0, div(rawone(x),g)