Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for i in iter #65

Merged
merged 3 commits into from
Mar 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[OffsetArrays]]
git-tree-sha1 = "707e34562700b81e8aa13548eb6b23b18112e49b"
uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
version = "1.0.2"

[[OrderedCollections]]
deps = ["Random", "Serialization", "Test"]
git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
Expand All @@ -49,9 +54,9 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[SIMDPirates]]
deps = ["VectorizationBase"]
git-tree-sha1 = "34dff4f4715f871e71b38f31397d96e62621f14d"
git-tree-sha1 = "f91198b7ef74b04028f98e0eed7c556b93538a2e"
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
version = "0.6.5"
version = "0.6.6"

[[SLEEFPirates]]
deps = ["Libdl", "SIMDPirates", "VectorizationBase"]
Expand All @@ -71,6 +76,6 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[VectorizationBase]]
deps = ["CpuId", "LinearAlgebra"]
git-tree-sha1 = "006d7b7f276db8d728f8bfd70ebf2efd132f9548"
git-tree-sha1 = "8abb5697fb64cadccd1bba444c955942d3181e5c"
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
version = "0.7.0"
version = "0.7.1"
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.6.20"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
SIMDPirates = "21efa798-c60a-11e8-04d3-e1a92915a26a"
SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
Expand Down
4 changes: 3 additions & 1 deletion src/LoopVectorization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ using VectorizationBase, SIMDPirates, SLEEFPirates, Parameters
using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector_load_expr,
mask, masktable, pick_vector_width_val, valmul, valrem, valmuladd, valadd, valsub, _MM,
maybestaticlength, maybestaticsize, staticm1, subsetview, vzero, stridedpointer_for_broadcast,
Static, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange,
Static, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange, unwrap, maybestaticrange,
PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct
using SIMDPirates: VECTOR_SYMBOLS, evadd, evmul, vrange, reduced_add, reduced_prod, reduce_to_add, reduce_to_prod,
sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!,
vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, #prefetch,
vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone
using Base.Broadcast: Broadcasted, DefaultArrayStyle
using LinearAlgebra: Adjoint, Transpose
using Base.Meta: isexpr

const SUPPORTED_TYPES = Union{Float16,Float32,Float64,Integer}

Expand All @@ -21,6 +22,7 @@ export LowDimArray, stridedpointer, vectorizable,
vfilter, vfilter!


include("vectorizationbase_extensions.jl")
include("map.jl")
include("filter.jl")
include("costs.jl")
Expand Down
10 changes: 0 additions & 10 deletions src/add_loads.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,3 @@ function add_loopvalue!(ls::LoopSet, arg::Symbol, elementbytes::Int)
loopsymop
end


struct LoopValue end
@inline VectorizationBase.stridedpointer(::LoopValue) = LoopValue()
@inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
# @inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}, ::Unsigned) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
@inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}, ::Mask) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
@inline VectorizationBase.vload(::LoopValue, i::Integer) = i + one(i)
@inline VectorizationBase.vload(::LoopValue, i::Tuple{I}) where {I<:Integer} = @inbounds(i[1]) + one(I)
@inline Base.eltype(::LoopValue) = Int8

96 changes: 52 additions & 44 deletions src/graphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

# For passing options like array types and mask
# struct LoopSetOptions

# end

struct Loop
Expand Down Expand Up @@ -70,7 +70,7 @@ function startloop(loop::Loop, isvectorized, W, itersymbol = loop.itersymbol)
elseif startexact
Expr(:(=), itersymbol, loop.starthint)
else
Expr(:(=), itersymbol, loop.startsym)
Expr(:(=), itersymbol, Expr(:call, lv(:unwrap), loop.startsym))
end
end
function vec_looprange(loop::Loop, isunrolled::Bool, W::Symbol, U::Int)
Expand All @@ -84,7 +84,7 @@ function vec_looprange(loop::Loop, isunrolled::Bool, W::Symbol, U::Int)
else
Expr(:call, :<, loop.itersymbol, Expr(:call, :-, loop.stopsym, incr))
end
end
end
function looprange(loop::Loop, incr::Int, mangledname::Symbol)
incr -= 1#one(Int32)
if iszero(incr)
Expand Down Expand Up @@ -369,47 +369,59 @@ This function creates a loop, while switching from 1 to 0 based indices
"""
function register_single_loop!(ls::LoopSet, looprange::Expr)
itersym = (looprange.args[1])::Symbol
r = (looprange.args[2])::Expr
@assert r.head === :call
f = first(r.args)
loop::Loop = if f === :(:)
lower = r.args[2]
upper = r.args[3]
lii::Bool = lower isa Integer
liiv::Int = lii ? (convert(Int, lower)-1) : 0
uii::Bool = upper isa Integer
if lii & uii # both are integers
Loop(itersym, liiv, convert(Int, upper))
elseif lii # only lower bound is an integer
if upper isa Symbol
Loop(itersym, liiv, upper)
elseif upper isa Expr
Loop(itersym, liiv, add_loop_bound!(ls, itersym, upper, true))
else
Loop(itersym, liiv, add_loop_bound!(ls, itersym, upper, true))
r = looprange.args[2]
if isexpr(r, :call)
f = first(r.args)
loop::Loop = if f === :(:)
lower = r.args[2]
upper = r.args[3]
lii::Bool = lower isa Integer
liiv::Int = lii ? (convert(Int, lower)-1) : 0
uii::Bool = upper isa Integer
if lii & uii # both are integers
Loop(itersym, liiv, convert(Int, upper))
elseif lii # only lower bound is an integer
if upper isa Symbol
Loop(itersym, liiv, upper)
elseif upper isa Expr
Loop(itersym, liiv, add_loop_bound!(ls, itersym, upper, true))
else
Loop(itersym, liiv, add_loop_bound!(ls, itersym, upper, true))
end
elseif uii # only upper bound is an integer
uiiv = convert(Int, upper)
Loop(itersym, add_loop_bound!(ls, itersym, lower, false), uiiv)
else # neither are integers
L = add_loop_bound!(ls, itersym, lower, false)
U = add_loop_bound!(ls, itersym, upper, true)
Loop(itersym, L, U)
end
elseif uii # only upper bound is an integer
uiiv = convert(Int, upper)
Loop(itersym, add_loop_bound!(ls, itersym, lower, false), uiiv)
else # neither are integers
L = add_loop_bound!(ls, itersym, lower, false)
U = add_loop_bound!(ls, itersym, upper, true)
elseif f === :eachindex
N = gensym(Symbol(:loopeachindex, itersym))
pushpreamble!(ls, Expr(:(=), N, Expr(:call, lv(:maybestaticrange), r)))
L = add_loop_bound!(ls, itersym, Expr(:call, :first, N), false)
U = add_loop_bound!(ls, itersym, Expr(:call, :last, N), true)
Loop(itersym, L, U)
end
elseif f === :eachindex
N = gensym(Symbol(:loop, itersym))
pushpreamble!(ls, Expr(:(=), N, Expr(:call, lv(:maybestaticlength), r.args[2])))
Loop(itersym, 0, N)
elseif f === :OneTo || f == Expr(:(.), :Base, QuoteNode(:OneTo))
otN = r.args[2]
if otN isa Integer
Loop(itersym, 0, otN)
elseif f === :OneTo || f == Expr(:(.), :Base, QuoteNode(:OneTo))
otN = r.args[2]
if otN isa Integer
Loop(itersym, 0, otN)
else
otN isa Expr && maybestatic!(otN)
N = gensym(Symbol(:loop, itersym))
pushpreamble!(ls, Expr(:(=), N, otN))
Loop(itersym, 0, N)
end
else
otN isa Expr && maybestatic!(otN)
N = gensym(Symbol(:loop, itersym))
pushpreamble!(ls, Expr(:(=), N, otN))
Loop(itersym, 0, N)
throw("Unrecognized loop range type: $r.")
end
elseif isa(r, Symbol)
# Treat similar to `eachindex`
N = gensym(Symbol(:loop, itersym))
pushpreamble!(ls, Expr(:(=), N, Expr(:call, lv(:maybestaticrange), r)))
L = add_loop_bound!(ls, itersym, Expr(:call, :first, N), false)
U = add_loop_bound!(ls, itersym, Expr(:call, :last, N), true)
loop = Loop(itersym, L, U)
else
throw("Unrecognized loop range type: $r.")
end
Expand Down Expand Up @@ -546,7 +558,3 @@ function Base.push!(ls::LoopSet, ex::Expr, elementbytes::Int, position::Int)
throw("Don't know how to handle expression:\n$ex")
end
end




13 changes: 12 additions & 1 deletion src/reconstruct_loopset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ function Loop(ls::LoopSet, l::Int, ::Type{StaticLowerUnitRange{L}}) where {L}
pushpreamble!(ls, Expr(:(=), stop, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), Expr(:(.), Expr(:ref, :lb, l), QuoteNode(:U)))))
Loop(gensym(:n), L, L + 1024, Symbol(""), stop, true, false)::Loop
end
# Is there any likely way to generate such a range?
# function Loop(ls::LoopSet, l::Int, ::Type{StaticLengthUnitRange{N}}) where {N}
# start = gensym(:loopstart); stop = gensym(:loopstop)
# pushpreamble!(ls, Expr(:(=), start, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), Expr(:(.), Expr(:ref, :lb, l), QuoteNode(:L)))))
# pushpreamble!(ls, Expr(:(=), stop, Expr(:call, :(+), start, N - 1)))
# Loop(gensym(:n), 0, N, start, stop, false, false)::Loop
# end
function Loop(ls, l, ::Type{StaticUnitRange{L,U}}) where {L,U}
Loop(gensym(:n), L, U, Symbol(""), Symbol(""), true, true)::Loop
end
Expand Down Expand Up @@ -63,14 +70,18 @@ extract_varg(i) = Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__
pushvarg!(ls::LoopSet, ar::ArrayReferenceMeta, i) = pushpreamble!(ls, Expr(:(=), vptr(ar), extract_varg(i)))
function pushvarg′!(ls::LoopSet, ar::ArrayReferenceMeta, i)
reverse!(ar.loopedindex); reverse!(getindices(ar)) # reverse the listed indices here, and transpose it to make it column major
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:call, lv(:Transpose), extract_varg(i))))
pushpreamble!(ls, Expr(:(=), vptr(ar), Expr(:call, lv(:transpose), extract_varg(i))))
end
function add_mref!(ls::LoopSet, ar::ArrayReferenceMeta, i::Int, ::Type{PackedStridedPointer{T, N}}) where {T, N}
pushvarg!(ls, ar, i)
end
function add_mref!(ls::LoopSet, ar::ArrayReferenceMeta, i::Int, ::Type{RowMajorStridedPointer{T, N}}) where {T, N}
pushvarg′!(ls, ar, i)
end
function add_mref!(ls::LoopSet, ar::ArrayReferenceMeta, i::Int, ::Type{OffsetStridedPointer{T,N,P}}) where {T,N,P}
add_mref!(ls, ar, i, P)
end

function add_mref!(
ls::LoopSet, ar::ArrayReferenceMeta, i::Int, ::Type{S}
) where {T, X <: Tuple, S <: VectorizationBase.AbstractStaticStridedPointer{T,X}}
Expand Down
38 changes: 38 additions & 0 deletions src/vectorizationbase_extensions.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

struct LoopValue end
@inline VectorizationBase.stridedpointer(::LoopValue) = LoopValue()
@inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
# @inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}, ::Unsigned) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
@inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}, ::Mask) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
@inline VectorizationBase.vload(::LoopValue, i::Integer) = i + one(i)
@inline VectorizationBase.vload(::LoopValue, i::Tuple{I}) where {I<:Integer} = @inbounds(i[1]) + one(I)
@inline Base.eltype(::LoopValue) = Int8

import OffsetArrays

# If ndim(::OffsetArray) == 1, we can convert to a regular strided pointer and offset.
@inline VectorizationBase.stridedpointer(a::OffsetArrays.OffsetArray{<:Any,1}) = gesp(stridedpointer(parent(a)), (-@inbounds(a.offsets[1]),))

struct OffsetStridedPointer{T, N, P <: VectorizationBase.AbstractStridedPointer{T}} <: VectorizationBase.AbstractStridedPointer{T}
ptr::P
offsets::NTuple{N,Int}
end
# if ndim(A::OffsetArray) ≥ 2, then eachindex(A) isa Base.OneTo, index starting at 1.
# but multiple indexing is calculated using offsets, so we need a special type to express this.
@inline function VectorizationBase.stridedpointer(A::OffsetArrays.OffsetArray)
OffsetStridedPointer(stridedpointer(parent(A)), A.offsets)
end
# Tuple of length == 1, use ind directly.
# @inline VectorizationBase.offset(ptr::OffsetStridedPointer, ind::Tuple{I}) where {I} = VectorizationBase.offset(ptr.ptr, ind)
# Tuple of length > 1, subtract offsets.
# @inline VectorizationBase.offset(ptr::OffsetStridedPointer{<:Any,N}, ind::Tuple) where {N} = VectorizationBase.offset(ptr.ptr, ntuple(n -> ind[n] + ptr.offsets[n], Val{N}()))
@inline VectorizationBase.offset(ptr::OffsetStridedPointer, ind::Tuple{I}) where {I} = ind
# Tuple of length > 1, subtract offsets.
@inline VectorizationBase.offset(ptr::OffsetStridedPointer{<:Any,N}, ind::Tuple) where {N} = ntuple(n -> ind[n] - ptr.offsets[n], Val{N}())
@inline Base.similar(p::OffsetStridedPointer, ptr::Ptr) = OffsetStridedPointer(similar(p.ptr, ptr), p.offsets)

# If an OffsetArray is getting indexed by a (loop-)constant value, then this particular vptr object cannot also be eachindexed, so we can safely return a stridedpointer
@inline function VectorizationBase.subsetview(ptr::OffsetStridedPointer{<:Any,N}, ::Val{I}, i) where {I,N}
subsetview(gesp(ptr.ptr, ntuple(n -> 0 - @inbounds(ptr.offsets[n]), Val{N}())), Val{I}(), i)
end

19 changes: 17 additions & 2 deletions test/dot.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
using LoopVectorization, OffsetArrays
using Test

@testset "dot" begin
dotq = :(for i ∈ eachindex(a,b)
s += a[i]*b[i]
Expand Down Expand Up @@ -46,6 +49,14 @@
end
s
end
function myselfdotavx_range(a)
s = zero(eltype(a))
rng = axes(a, 1)
@avx for i ∈ rng
s += a[i]*a[i]
end
s
end
function myselfdot_avx(a)
s = zero(eltype(a))
@_avx for i ∈ eachindex(a)
Expand Down Expand Up @@ -167,7 +178,7 @@
end
4acc/length(x)
end

# @macroexpand @_avx for i = 1:length(a_re) - 1
# c_re[i] = b_re[i] * a_re[i + 1] - b_im[i] * a_im[i + 1]
# c_im[i] = b_re[i] * a_im[i + 1] + b_im[i] * a_re[i + 1]
Expand All @@ -179,9 +190,12 @@
N = 127
R = T <: Integer ? (T(-100):T(100)) : T
a = rand(T, N); b = rand(R, N);
ao = OffsetArray(a, -60:66); bo = OffsetArray(b, -60:66);
s = mydot(a, b)
@test mydotavx(a,b) ≈ s
@test mydot_avx(a,b) ≈ s
@test mydotavx(ao,bo) ≈ s
@test mydot_avx(ao,bo) ≈ s
@test dot_unroll2avx(a,b) ≈ s
@test dot_unroll3avx(a,b) ≈ s
@test dot_unroll2_avx(a,b) ≈ s
Expand All @@ -190,6 +204,7 @@
@test dot_unroll3avx_inline(a,b) ≈ s
s = myselfdot(a)
@test myselfdotavx(a) ≈ s
@test myselfdotavx_range(a) ≈ s
@test myselfdot_avx(a) ≈ s
@test myselfdotavx(a) ≈ s

Expand All @@ -205,7 +220,7 @@
b_re = rand(R, N); b_im = rand(R, N);
ac = Complex.(a_re, a_im);
bc = Complex.(b_re, b_im);

@test mydot(ac, bc) ≈ complex_dot_soa(a_re, a_im, b_re, b_im)

c_re1 = similar(a_re); c_im1 = similar(a_im);
Expand Down
16 changes: 16 additions & 0 deletions test/gemv.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
using LoopVectorization
using Test

@testset "GEMV" begin
gemvq = :(for i ∈ eachindex(y)
yᵢ = 0.0
Expand Down Expand Up @@ -27,6 +30,16 @@
y[i] = yᵢ
end
end
function mygemvavx_range!(y, A, x)
rng1, rng2 = axes(A)
@avx for i ∈ rng1
yᵢ = zero(eltype(y))
for j ∈ rng2
yᵢ += A[i,j] * x[j]
end
y[i] = yᵢ
end
end
q = :(for i ∈ eachindex(y)
yᵢ = zero(eltype(y))
for j ∈ eachindex(x)
Expand Down Expand Up @@ -150,6 +163,9 @@
@test y1 ≈ y2
fill!(y2, -999.9); mygemv_avx!(y2, A, x)
@test y1 ≈ y2
fill!(y2, -999.9)
mygemvavx_range!(y2, A, x)
@test y1 ≈ y2

B = rand(R, N, N);
G1 = Matrix{TC}(undef, N, 1);
Expand Down
Loading