Skip to content

Commit

Permalink
Revert changes back to vload/vstore! instead of load/store!.
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Feb 24, 2020
1 parent ef20072 commit d6ed285
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 27 deletions.
12 changes: 6 additions & 6 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[SIMDPirates]]
deps = ["VectorizationBase"]
git-tree-sha1 = "ecacd3f808e559d9e363f2620041c6286c8efaca"
git-tree-sha1 = "4b1e0b1442fb4af5e6b93b9c7fdeacf287d2653b"
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
version = "0.4.0"
version = "0.5.0"

[[SLEEFPirates]]
deps = ["Libdl", "SIMDPirates", "VectorizationBase"]
git-tree-sha1 = "bb99e28c0284de9c3233258a93882429752faa55"
git-tree-sha1 = "769fd039d0835e8e628d61e2f0c80822ba668497"
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
version = "0.3.8"
version = "0.3.9"

[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Expand All @@ -71,6 +71,6 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[VectorizationBase]]
deps = ["CpuId", "LinearAlgebra"]
git-tree-sha1 = "b9b5c8fa55e9b859989e759f405624d16b0b0ca2"
git-tree-sha1 = "9f8caaa5d033f88e188f62a3dba0dab5f429447a"
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
version = "0.4.2"
version = "0.5.0"
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LoopVectorization"
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
authors = ["Chris Elrod <[email protected]>"]
version = "0.6.14"
version = "0.6.15"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -12,9 +12,9 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"

[compat]
Parameters = "0"
SIMDPirates = "~0.4"
SLEEFPirates = "~0.3.8"
VectorizationBase = "~0.4.2"
SIMDPirates = "~0.5"
SLEEFPirates = "~0.3.9"
VectorizationBase = "~0.5"
julia = "1.1"

[extras]
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ It then tries to vectorize the loop to improve runtime performance.

The macro assumes that loop iterations can be reordered. It also currently supports simple nested loops, where loop bounds of inner loops are constant across iterations of the outer loop, and only a single loop at each level of noop lest. These limitations should be removed in a future version.

## Benchmarks

Please see the documentation for benchmarks versus base Julia, Clang-Polly, icc, ifort, gfortran, and Eigen. If you would believe any code or compiler flags can be improved, would like to submit your own benchmarks, or have Julia code using LoopVectorization that you would like to be tested for performance regressions on a semi-regular basis, please feel file an issue or PR with the code sample.

## Examples
### Dot Product
<details>
Expand Down
2 changes: 1 addition & 1 deletion src/add_constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ function add_constant!(ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPositi
op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS, mpref.mref)
add_vptr!(ls, op)
temp = gensym(:intermediateconstref)
pushpreamble!(ls, Expr(:(=), temp, Expr(:call, lv(:load), mpref.mref.ptr, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), nothing)))))
pushpreamble!(ls, Expr(:(=), temp, Expr(:call, lv(:vload), mpref.mref.ptr, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), nothing)))))
pushpreamble!(ls, op, temp)
pushop!(ls, op, temp)
end
Expand Down
8 changes: 4 additions & 4 deletions src/add_loads.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ end

struct LoopValue end
@inline VectorizationBase.stridedpointer(::LoopValue) = LoopValue()
@inline VectorizationBase.load(::LoopValue, i::Tuple{_MM{W}}) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
@inline VectorizationBase.load(::LoopValue, i::Tuple{_MM{W}}, ::Unsigned) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
@inline VectorizationBase.load(::LoopValue, i::Integer) = i + one(i)
@inline VectorizationBase.load(::LoopValue, i::Tuple{I}) where {I<:Integer} = @inbounds(i[1]) + one(I)
@inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
@inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}, ::Unsigned) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
@inline VectorizationBase.vload(::LoopValue, i::Integer) = i + one(i)
@inline VectorizationBase.vload(::LoopValue, i::Tuple{I}) where {I<:Integer} = @inbounds(i[1]) + one(I)
@inline Base.eltype(::LoopValue) = Int8

4 changes: 2 additions & 2 deletions src/filter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ function vfilter!(f::F, x::Vector{T}, y::AbstractArray{T}) where {F,T <: SUPPORT
ptr_x = pointer(x)
ptr_y = pointer(y)
for _ 1:Nrep
vy = load(Vec{W,T}, ptr_y, i)
vy = vload(Vec{W,T}, ptr_y, i)
mask = f(SVec(vy))
SIMDPirates.compressstore!(gep(ptr_x, j), vy, mask)
i += W
j += count_ones(mask)
end
rem_mask = VectorizationBase.mask(T, Nrem)
vy = load(Vec{W,T}, gep(ptr_y, i), rem_mask)
vy = vload(Vec{W,T}, gep(ptr_y, i), rem_mask)
mask = rem_mask & f(SVec(vy))
SIMDPirates.compressstore!(gep(ptr_x, j), vy, mask)
j += count_ones(mask)
Expand Down
4 changes: 2 additions & 2 deletions src/lower_load.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ function pushvectorload!(q::Expr, op::Operation, var::Symbol, td::UnrollArgs, U:
@unpack u, unrolled = td
ptr = refname(op)
name, mo = name_memoffset(var, op, td, W, vecnotunrolled)
instrcall = Expr(:call, lv(:load), ptr, mo)
instrcall = Expr(:call, lv(:vload), ptr, mo)
if mask !== nothing && (vecnotunrolled || u == U - 1)
push!(instrcall.args, mask)
end
Expand All @@ -21,7 +21,7 @@ function lower_load_scalar!(
for u 0:U-1
varname = varassignname(var, u, isunrolled)
td = UnrollArgs(u, unrolled, tiled, suffix)
push!(q.args, Expr(:(=), varname, Expr(:call, lv(:load), ptr, mem_offset_u(op, td))))
push!(q.args, Expr(:(=), varname, Expr(:call, lv(:vload), ptr, mem_offset_u(op, td))))
end
nothing
end
Expand Down
8 changes: 4 additions & 4 deletions src/lower_store.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ function lower_conditionalstore_scalar!(
varname = varassignname(var, u, parentisunrolled)
condvarname = varassignname(condvar, u, condunrolled)
td = UnrollArgs(u, unrolled, tiled, suffix)
push!(q.args, Expr(:&&, condvarname, Expr(:call, lv(:store!), ptr, varname, mem_offset_u(op, td))))
push!(q.args, Expr(:&&, condvarname, Expr(:call, lv(:vstore!), ptr, varname, mem_offset_u(op, td))))
end
nothing
end
Expand Down Expand Up @@ -103,7 +103,7 @@ function lower_conditionalstore_vectorized!(
td = UnrollArgs(u, unrolled, tiled, suffix)
name, mo = name_memoffset(var, op, td, W, vecnotunrolled, parentisunrolled)
condvarname = varassignname(condvar, u, condunrolled)
instrcall = Expr(:call, lv(:store!), ptr, name, mo)
instrcall = Expr(:call, lv(:vstore!), ptr, name, mo)
if mask !== nothing && (vecnotunrolled || u == U - 1)
push!(instrcall.args, Expr(:call, lv(:combinemasks), condvarname, mask))
else
Expand All @@ -123,7 +123,7 @@ function lower_store_scalar!(
for u 0:U-1
varname = varassignname(var, u, parentisunrolled)
td = UnrollArgs(u, unrolled, tiled, suffix)
push!(q.args, Expr(:call, lv(:store!), ptr, varname, mem_offset_u(op, td)))
push!(q.args, Expr(:call, lv(:vstore!), ptr, varname, mem_offset_u(op, td)))
end
nothing
end
Expand All @@ -147,7 +147,7 @@ function lower_store_vectorized!(
for u 0:U-1
td = UnrollArgs(u, unrolled, tiled, suffix)
name, mo = name_memoffset(var, op, td, W, vecnotunrolled, parentisunrolled)
instrcall = Expr(:call, lv(:store!), ptr, name, mo)
instrcall = Expr(:call, lv(:vstore!), ptr, name, mo)
if mask !== nothing && (vecnotunrolled || u == U - 1)
push!(instrcall.args, mask)
end
Expand Down
8 changes: 4 additions & 4 deletions src/map.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ function vmap_quote(N, ::Type{T}) where {T}
val = Expr(:call, Expr(:curly, :Val, W))
q = Expr(:block, Expr(:(=), :M, Expr(:call, :length, :dest)), Expr(:(=), :vdest, Expr(:call, :pointer, :dest)), Expr(:(=), :m, 0))
fcall = Expr(:call, :f)
loopbody = Expr(:block, Expr(:call, :store!, :vdest, fcall, :m), Expr(:(+=), :m, W))
loopbody = Expr(:block, Expr(:call, :vstore!, :vdest, fcall, :m), Expr(:(+=), :m, W))
fcallmask = Expr(:call, :f)
bodymask = Expr(:block, Expr(:(=), :__mask__, Expr(:call, :mask, val, Expr(:call, :&, :M, W-1))), Expr(:call, :store!, :vdest, fcallmask, :m, :__mask__))
bodymask = Expr(:block, Expr(:(=), :__mask__, Expr(:call, :mask, val, Expr(:call, :&, :M, W-1))), Expr(:call, :vstore!, :vdest, fcallmask, :m, :__mask__))
for n 1:N
arg_n = Symbol(:varg_,n)
push!(q.args, Expr(:(=), arg_n, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,Symbol(@__FILE__)), Expr(:call, :pointer, Expr(:ref, :args, n)))))
push!(fcall.args, Expr(:call, :load, val, arg_n, :m))
push!(fcallmask.args, Expr(:call, :load, val, arg_n, :m, :__mask__))
push!(fcall.args, Expr(:call, :vload, val, arg_n, :m))
push!(fcallmask.args, Expr(:call, :vload, val, arg_n, :m, :__mask__))
end
loop = Expr(:for, Expr(:(=), :_, Expr(:call, :(:), 0, Expr(:call, :-, Expr(:call, :(>>>), :M, Wshift), 1))), loopbody)
push!(q.args, loop)
Expand Down

2 comments on commit d6ed285

@chriselrod
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/10004

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if Julia TagBot is installed, or can be done manually through the github interface, or via:

git tag -a v0.6.15 -m "<description of version>" d6ed285a4a8eba9caa672deeb444d729704efaec
git push origin v0.6.15

Please sign in to comment.