Skip to content

Commit

Permalink
Fix inbounds codegen for CPU (#431)
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy authored Nov 6, 2023
1 parent edd5371 commit 32bf2bc
Showing 1 changed file with 17 additions and 12 deletions.
29 changes: 17 additions & 12 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,6 @@ function __kernel(expr, generate_cpu=true, force_inbounds=false)
def = splitdef(expr)
name = def[:name]
args = def[:args]
if force_inbounds
body_qt = quote
@inbounds $(def[:body])
end
def[:body] = body_qt
end

find_return(expr) && error("Return statement not permitted in a kernel function $name")

constargs = Array{Bool}(undef, length(args))
Expand All @@ -45,13 +38,13 @@ function __kernel(expr, generate_cpu=true, force_inbounds=false)
if generate_cpu
def_cpu = deepcopy(def)
def_cpu[:name] = cpu_name
transform_cpu!(def_cpu, constargs)
transform_cpu!(def_cpu, constargs, force_inbounds)
cpu_function = combinedef(def_cpu)
end

def_gpu = deepcopy(def)
def_gpu[:name] = gpu_name = Symbol(:gpu_, name)
transform_gpu!(def_gpu, constargs)
transform_gpu!(def_gpu, constargs, force_inbounds)
gpu_function = combinedef(def_gpu)

# create constructor functions
Expand Down Expand Up @@ -83,17 +76,23 @@ end

# The easy case, transform the function for GPU execution
# - mark constant arguments by applying `constify`.
function transform_gpu!(def, constargs)
function transform_gpu!(def, constargs, force_inbounds)
let_constargs = Expr[]
for (i, arg) in enumerate(def[:args])
if constargs[i]
push!(let_constargs, :($arg = $constify($arg)))
end
end
pushfirst!(def[:args], :__ctx__)
body = def[:body]
if force_inbounds
body = quote
@inbounds $(body)
end
end
body = quote
if $__validindex(__ctx__)
$(def[:body])
$(body)
end
return nothing
end
Expand All @@ -110,7 +109,7 @@ end
# - handle indicies
# - hoist workgroup definitions
# - hoist uniform variables
function transform_cpu!(def, constargs)
function transform_cpu!(def, constargs, force_inbounds)
let_constargs = Expr[]
for (i, arg) in enumerate(def[:args])
if constargs[i]
Expand All @@ -121,7 +120,13 @@ function transform_cpu!(def, constargs)
new_stmts = Expr[]
body = MacroTools.flatten(def[:body])
push!(new_stmts, Expr(:aliasscope))
if force_inbounds
push!(new_stmts, Expr(:inbounds, true))
end
append!(new_stmts, split(body.args))
if force_inbounds
push!(new_stmts, Expr(:inbounds, :pop))
end
push!(new_stmts, Expr(:popaliasscope))
push!(new_stmts, :(return nothing))
def[:body] = Expr(:let,
Expand Down

0 comments on commit 32bf2bc

Please sign in to comment.