From 1ed7530adaa9f2372966f5116903b189705975ec Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Fri, 20 Oct 2017 01:39:30 +0200 Subject: [PATCH 01/12] only replace inbuild and empty --- src/kernel.jl | 194 ++++++++++++++------------------------------------ 1 file changed, 53 insertions(+), 141 deletions(-) diff --git a/src/kernel.jl b/src/kernel.jl index f4fc13c2..8125d8ef 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -1,5 +1,4 @@ # OpenCL.Kernel - type Kernel <: CLObject id :: CL_kernel @@ -47,7 +46,7 @@ immutable LocalMem{T} nbytes::Csize_t end -LocalMem{T}(::Type{T}, len::Integer) = begin +function LocalMem{T}(::Type{T}, len::Integer) @assert len > 0 nbytes = sizeof(T) * len return LocalMem{T}(convert(Csize_t, nbytes)) @@ -84,168 +83,81 @@ function set_arg!(k::Kernel, idx::Integer, arg::LocalMem) return k end +function _contains_different_layout(::Type{T}) where T + sizeof(T) == 0 && return true + nfields(T) == 0 && return false + for fname in fieldnames(T) + contains_different_layout(fieldtype(T, fname)) && return true + end + return false +end -is_cl_vector{T}(x::T) = _is_cl_vector(T) -is_cl_vector{T}(x::Type{T}) = _is_cl_vector(T) -_is_cl_vector(x) = false -_is_cl_vector{N, T}(x::Type{NTuple{N, T}}) = is_cl_number(T) && N in (2, 3, 4, 8, 16) -is_cl_number{T}(x::Type{T}) = _is_cl_number(T) -is_cl_number{T}(x::T) = _is_cl_number(T) -_is_cl_number(x) = false -function _is_cl_number{T <: Union{ - Int64, Int32, Int16, Int8, - UInt64, UInt32, UInt16, UInt8, - Float64, Float32, Float16 - }}(::Type{T}) +function contains_different_layout(::Type{NTuple{3, T}}) where T <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64} true end -is_cl_inbuild{T}(x::T) = is_cl_vector(x) || is_cl_number(x) - -immutable Pad{N} - val::NTuple{N, Int8} - (::Type{Pad{N}}){N}() = new{N}(ntuple(i-> Int8(0), Val(N))) +@generated function contains_different_layout(::Type{T}) where T + :($(_contains_different_layout(T))) end -Base.isempty{N}(::Type{Pad{N}}) = (N == 0) -Base.isempty{N}(::Pad{N}) = N == 0 - - -""" -OpenCL 1.2 Specs: -6.1.5 Alignment of Types -A data item declared to be a data type in memory is always aligned to the size of the data type in -bytes. For example, a float4 variable will be aligned to a 16-byte boundary, a char2 variable will -be aligned to a 2-byte boundary. -For 3-component vector data types, the size of the data type is 4 * sizeof(component). This -means that a 3-component vector data type will be aligned to a 4 * sizeof(component) -boundary. The vload3 and vstore3 built-in functions can be used to read and write, respectively, -3-component vector data types from an array of packed scalar data type. -A built-in data type that is not a power of two bytes in size must be aligned to the next larger -power of two. This rule applies to built-in types only, not structs or unions. -The OpenCL compiler is responsible for aligning data items to the appropriate alignment as -required by the data type. For arguments to a `__kernel` function declared to be a pointer to a -data type, the OpenCL compiler can assume that the pointee is always appropriately aligned as -required by the data type. The behavior of an unaligned load or store is undefined, except for the -vloadn, vload_halfn, vstoren, and vstore_halfn functions defined in section 6.12.7. The vector -load functions can read a vector from an address aligned to the element type of the vector. The -vector store functions can write a vector to an address aligned to the element type of the vector. -""" -cl_alignement(x) = cl_packed_sizeof(x) - -function advance_aligned(offset, alignment) - (offset == 0 || alignment == 0) && return 0 - if offset % alignment != 0 - npad = ((div(offset, alignment) + 1) * alignment) - offset - offset += npad + +function struct2tuple(x::T) where T + ntuple(Val{nfields(T)}) do i + getfield(x, i) end - offset end - -""" -Sizeof that considers OpenCL alignement. See cl_alignement -""" -function _cl_packed_sizeof{T}(::Type{T}) - tsz = sizeof(T) - tsz == 0 && fieldcount(T) == 0 && return 4 # 0 sized types can't be defined - size = if is_cl_inbuild(T) || fieldcount(T) == 0 - if is_cl_inbuild(T) - # inbuild sizes are all power of two! - return ispow2(tsz) ? tsz : nextpow2(tsz) - else - return tsz - end +function replace_different_layout(x::T) where T + !contains_different_layout(T) && return x + if nfields(x) == 0 + replace_different_layout((), (x,)) + elseif T <: Tuple + replace_different_layout((), x) else - size = 0 - for field in fieldnames(T) - size += _cl_packed_sizeof(fieldtype(T, field)) - end - return size + replace_different_layout((), struct2tuple(x)) end end -cl_packed_sizeof{T}(x::T) = cl_packed_sizeof(T) -Base.@generated function cl_packed_sizeof{T}(x::Type{T}) - :($(_cl_packed_sizeof(T))) -end -get_typ{T}(::Type{Type{T}}) = T -""" -Converts a Julia type to conform to a `__packed__` struct in OpenCL. -If a type gets passed, it will return the converted type. -This conforms to the OpenCL 1.2 specs, section 6.11.1: -``` - __packed__ - This attribute, attached to struct or union type definition, specifies that each - member of the structure or union is placed to minimize the memory required. When - attached to an enum definition, it indicates that the smallest integral type should be used. - Specifying this attribute for struct and union types is equivalent to specifying - the packed attribute on each of the structure or union members. - In the following example struct my_packed_struct's members are - packed closely together, but the internal layout of its s member is not packed. To - do that, struct my_unpacked_struct would need to be packed, too. - struct my_unpacked_struct - { - char c; - int i; - }; - - struct __attribute__ ((packed)) my_packed_struct - { - char c; - int i; - struct my_unpacked_struct s; - }; - - You may only specify this attribute on the definition of a enum, struct or - union, not on a typedef which does not also define the enumerated type, - structure or union. -``` -""" -@generated function packed_convert{TX}(x::TX) - elements = []; fields = [] - T = x <: Type ? get_typ(x) : x - _packed_convert!(T, elements, fields, :x) - TC = Tuple{last.(elements)...} - sizeof(TC) == sizeof(T) && return :(x) # no conversion happened - if x <: Type # if is not a datatype - :($TC) +replace_different_layout(red::NTuple{N, Any}, rest::Tuple{}) where N = red +function replace_different_layout(red::NTuple{N, Any}, rest) where N + elem1 = first(rest) + T = typeof(elem1) + repl = if sizeof(T) == 0 && nfields(T) == 0 + Int32(0) + elseif contains_different_layout(T) + replace_different_layout(elem1) else - tupl = Expr(:tuple) - tupl.args = first.(elements) - # hoist field loads - :($(fields...); $tupl) + elem1 end + replace_different_layout((red..., repl), Base.tail(rest)) end -function _packed_convert!(x, elements = [], fields = [], fieldname = gensym(:field)) - if !is_cl_inbuild(x) && fieldcount(x) > 0 - for field in fieldnames(x) - current_field = gensym(string(field)) - push!(fields, :($current_field = getfield($fieldname, $(QuoteNode(field))))) - xelem = fieldtype(x, field) - _packed_convert!(xelem, elements, fields, current_field) - end - else - push!(elements, fieldname => x) - if cl_packed_sizeof(x) > sizeof(x) # if size doesn't match, we need pads - npad = cl_packed_sizeof(x) - sizeof(x) - @assert npad > 0 # this shouldn't happen and would be a bug in cl_packed_sizeof! - push!(elements, :(Pad{$npad}()) => Pad{npad}) - end +# TODO UInt16/Float16? +# Handle different sizes of OpenCL Vec3, which doesn't agree with julia +function replace_different_layout(arg::NTuple{3, T}) where T <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64} + pad = T(0) + (arg..., pad) +end + +function to_cl_ref(arg::T) where T + if !Base.datatype_pointerfree(T) + error("Types should not contain pointers: $T") + end + if contains_different_layout(T) + x = replace_different_layout(arg) + return Base.RefValue(x), sizeof(x) end - return elements, fields, fieldname + Base.RefValue(arg), sizeof(arg) end function set_arg!{T}(k::Kernel, idx::Integer, arg::T) @assert idx > 0 "Kernel idx must be bigger 0" - if !Base.datatype_pointerfree(T) - error("Types should not contain pointers: $T") + ref, tsize = to_cl_ref(arg) + err = api.clSetKernelArg(k.id, cl_uint(idx - 1), tsize, ref) + if err == CL_INVALID_ARG_SIZE + error("Julia and OpenCL type don't match at kernel argument $idx: Found $T") end - packed = packed_convert(arg) - T_aligned = typeof(packed) - ref = Base.RefValue(packed) - @check api.clSetKernelArg(k.id, cl_uint(idx - 1), cl_packed_sizeof(T), ref) + @check err return k end From 6f57e6871c593de545baa908304e28a448eff61f Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Wed, 25 Oct 2017 13:08:35 +0200 Subject: [PATCH 02/12] add documentations and tests --- src/kernel.jl | 14 ++++++++++++ test/runtests.jl | 12 +++++------ test/test_kernel.jl | 52 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/src/kernel.jl b/src/kernel.jl index 8125d8ef..a54ceb41 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -96,6 +96,14 @@ function contains_different_layout(::Type{NTuple{3, T}}) where T <: Union{Float3 true end + +""" + contains_different_layout(T) + +Empty types and NTuple{3, CLNumber} have different layouts and need to be replaced +(Where `CLNumber <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64}`) +TODO: Float16 + Int16 should also be in CLNumbers +""" @generated function contains_different_layout(::Type{T}) where T :($(_contains_different_layout(T))) end @@ -106,6 +114,12 @@ function struct2tuple(x::T) where T end end +""" + replace_different_layout(x::T) where T + +Replaces types with a layout different from OpenCL. +See [contains_different_layout(T)](@ref) for information what types those are! +""" function replace_different_layout(x::T) where T !contains_different_layout(T) && return x if nfields(x) == 0 diff --git a/test/runtests.jl b/test/runtests.jl index 8f803dc7..405d3b2c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,14 +5,12 @@ using OpenCL @testset "aligned convert" begin x = ((10f0, 1f0, 2f0), (10f0, 1f0, 2f0), (10f0, 1f0, 2f0)) - x_aligned = cl.packed_convert(x) + clx = cl.replace_different_layout(x) - @test x_aligned == ((10f0, 1f0, 2f0), cl.Pad{4}(), (10f0, 1f0, 2f0), cl.Pad{4}(), (10f0, 1f0, 2f0), cl.Pad{4}()) - x_aligned_t = cl.packed_convert(typeof(x)) - @test x_aligned_t == typeof(x_aligned) - - x = cl.packed_convert(77f0) - @test x == 77f0 + @test clx == ((10f0, 1f0, 2f0, 0f0), (10f0, 1f0, 2f0, 0f0), (10f0, 1f0, 2f0, 0f0)) + x = (nothing, nothing, nothing) + clx = cl.replace_different_layout(x) + @test clx == (0,0,0) end function create_test_buffer() diff --git a/test/test_kernel.jl b/test/test_kernel.jl index bdbb5f81..0a7876f4 100644 --- a/test/test_kernel.jl +++ b/test/test_kernel.jl @@ -1,3 +1,9 @@ +struct CLTestStruct + f1::NTuple{3, Float32} + f2::Void + f3::Float32 +end + @testset "OpenCL.Kernel" begin test_source = " @@ -185,4 +191,50 @@ @test r[1] == 4 end end + + test_source = " + //packed + struct __attribute__((packed)) Test{ + float3 f1; + int f2; // empty type gets replaced with Int32 (no empty types allowed in OpenCL) + // you might need to define the alignement of fields to match julia's layout + float f3; // for the types used here the alignement matches though! + }; + // this is a case where Julia and OpenCL packed alignment would differ, so we need to specify it explicitely + struct __attribute__((packed)) Test2{ + long f1; + int __attribute__((align (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! + }; + + __kernel void structest(__global float *out, struct Test a, struct Test2 b){ + out[0] = a.f1.x; + out[1] = a.f1.y; + out[2] = a.f1.z; + out[3] = a.f3; + + out[4] = b.f1; + out[5] = b.f2; + } + " + + @testset "OpenCL.Kernel convert structs" begin + for device in cl.devices() + if device[:platform][:name] == "Portable Computing Language" + warn("Skipping OpenCL.Kernel constructor for " * + "Portable Computing Language Platform") + continue + end + ctx = cl.Context(device) + prg = cl.Program(ctx, source = test_source) + queue = cl.CmdQueue(ctx) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = cl.Buffer(Float32, ctx, :w, 6) + astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) + bstruct = (1, Int32(4)) + structkernel[queue, (1,)](out, astruct, bstruct) + r = cl.read(queue, out) + @test r == [1f0, 2f0, 3f0, 22f0, 1f0, 4f0] + end + end end From caa7b19b2578da84848531f6e231ad1e45732d8f Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Wed, 25 Oct 2017 14:15:00 +0200 Subject: [PATCH 03/12] more helpful error message --- src/kernel.jl | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/kernel.jl b/src/kernel.jl index a54ceb41..5b038e92 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -169,7 +169,28 @@ function set_arg!{T}(k::Kernel, idx::Integer, arg::T) ref, tsize = to_cl_ref(arg) err = api.clSetKernelArg(k.id, cl_uint(idx - 1), tsize, ref) if err == CL_INVALID_ARG_SIZE - error("Julia and OpenCL type don't match at kernel argument $idx: Found $T") + error(""" + Julia and OpenCL type don't match at kernel argument $idx: Found $T. + Please make sure to define OpenCL structs correctly! + You should be generally fine by using `__attribute__((packed))`, but sometimes the alignment of fields is different from Julia. + Consider the following example: + ``` + //packed + // Tuple{NTuple{3, Float32}, Void, Float32} + struct __attribute__((packed)) Test{ + float3 f1; + int f2; // empty type gets replaced with Int32 (no empty types allowed in OpenCL) + // you might need to define the alignement of fields to match julia's layout + float f3; // for the types used here the alignement matches though! + }; + // this is a case where Julia and OpenCL packed alignment would differ, so we need to specify it explicitely + // Tuple{Int64, Int32} + struct __attribute__((packed)) Test2{ + long f1; + int __attribute__((align (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! + }; + ``` + """) end @check err return k From 88d5c9d4bc86f44f01f1f01b2ba8c9de75cd40d8 Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Wed, 25 Oct 2017 14:17:28 +0200 Subject: [PATCH 04/12] 0.5 compat --- src/kernel.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/kernel.jl b/src/kernel.jl index 5b038e92..51005e64 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -83,7 +83,7 @@ function set_arg!(k::Kernel, idx::Integer, arg::LocalMem) return k end -function _contains_different_layout(::Type{T}) where T +function _contains_different_layout{T}(::Type{T}) sizeof(T) == 0 && return true nfields(T) == 0 && return false for fname in fieldnames(T) @@ -92,7 +92,7 @@ function _contains_different_layout(::Type{T}) where T return false end -function contains_different_layout(::Type{NTuple{3, T}}) where T <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64} +function contains_different_layout{T <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64}}(::Type{NTuple{3, T}}) true end @@ -104,11 +104,11 @@ Empty types and NTuple{3, CLNumber} have different layouts and need to be replac (Where `CLNumber <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64}`) TODO: Float16 + Int16 should also be in CLNumbers """ -@generated function contains_different_layout(::Type{T}) where T +@generated function contains_different_layout{T}(::Type{T}) :($(_contains_different_layout(T))) end -function struct2tuple(x::T) where T +function struct2tuple{T}(x::T) ntuple(Val{nfields(T)}) do i getfield(x, i) end @@ -120,7 +120,7 @@ end Replaces types with a layout different from OpenCL. See [contains_different_layout(T)](@ref) for information what types those are! """ -function replace_different_layout(x::T) where T +function replace_different_layout{T}(x::T) !contains_different_layout(T) && return x if nfields(x) == 0 replace_different_layout((), (x,)) @@ -131,8 +131,8 @@ function replace_different_layout(x::T) where T end end -replace_different_layout(red::NTuple{N, Any}, rest::Tuple{}) where N = red -function replace_different_layout(red::NTuple{N, Any}, rest) where N +replace_different_layout{N}(red::NTuple{N, Any}, rest::Tuple{}) = red +function replace_different_layout{N}(red::NTuple{N, Any}, rest) elem1 = first(rest) T = typeof(elem1) repl = if sizeof(T) == 0 && nfields(T) == 0 @@ -147,12 +147,12 @@ end # TODO UInt16/Float16? # Handle different sizes of OpenCL Vec3, which doesn't agree with julia -function replace_different_layout(arg::NTuple{3, T}) where T <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64} +function replace_different_layout{T <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64}}(arg::NTuple{3, T}) pad = T(0) (arg..., pad) end -function to_cl_ref(arg::T) where T +function to_cl_ref{T}(arg::T) if !Base.datatype_pointerfree(T) error("Types should not contain pointers: $T") end From 6144ac038eca31bea2ce5e9a5964797b1be19935 Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Wed, 25 Oct 2017 15:57:20 +0200 Subject: [PATCH 05/12] fix 0.5 --- test/test_kernel.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_kernel.jl b/test/test_kernel.jl index 0a7876f4..b2520515 100644 --- a/test/test_kernel.jl +++ b/test/test_kernel.jl @@ -1,4 +1,4 @@ -struct CLTestStruct +immutable CLTestStruct f1::NTuple{3, Float32} f2::Void f3::Float32 From 10344790c03fdabe299d522a2783eaa4fd6e7013 Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Fri, 27 Oct 2017 15:07:29 +0200 Subject: [PATCH 06/12] correct spelling mistake --- src/kernel.jl | 3 ++- test/test_kernel.jl | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/kernel.jl b/src/kernel.jl index 51005e64..ec395a2f 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -164,6 +164,7 @@ function to_cl_ref{T}(arg::T) end + function set_arg!{T}(k::Kernel, idx::Integer, arg::T) @assert idx > 0 "Kernel idx must be bigger 0" ref, tsize = to_cl_ref(arg) @@ -187,7 +188,7 @@ function set_arg!{T}(k::Kernel, idx::Integer, arg::T) // Tuple{Int64, Int32} struct __attribute__((packed)) Test2{ long f1; - int __attribute__((align (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! + int __attribute__((aligned (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! }; ``` """) diff --git a/test/test_kernel.jl b/test/test_kernel.jl index b2520515..f4b5ac9e 100644 --- a/test/test_kernel.jl +++ b/test/test_kernel.jl @@ -203,7 +203,7 @@ end // this is a case where Julia and OpenCL packed alignment would differ, so we need to specify it explicitely struct __attribute__((packed)) Test2{ long f1; - int __attribute__((align (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! + int __attribute__((aligned (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! }; __kernel void structest(__global float *out, struct Test a, struct Test2 b){ From a41893153120bd59de1ee5653232b5a271635b54 Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Fri, 27 Oct 2017 15:08:22 +0200 Subject: [PATCH 07/12] add datatype_align for more user comfort --- src/kernel.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/kernel.jl b/src/kernel.jl index ec395a2f..d127725c 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -164,6 +164,19 @@ function to_cl_ref{T}(arg::T) end +Base.@pure datatype_align(x::T) where {T} = datatype_align(T) +Base.@pure function datatype_align(::Type{T}) where {T} + # typedef struct { + # uint32_t nfields; + # uint32_t alignment : 9; + # uint32_t haspadding : 1; + # uint32_t npointers : 20; + # uint32_t fielddesc_type : 2; + # } jl_datatype_layout_t; + field = T.layout + sizeof(UInt32) + unsafe_load(convert(Ptr{UInt16}, field)) & convert(Int16, 2^9-1) +end + function set_arg!{T}(k::Kernel, idx::Integer, arg::T) @assert idx > 0 "Kernel idx must be bigger 0" @@ -191,6 +204,7 @@ function set_arg!{T}(k::Kernel, idx::Integer, arg::T) int __attribute__((aligned (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! }; ``` + You can use `c.datatype_align(T)` to figure out the alignment of a Julia type! """) end @check err From f182af93f707d843e313b79b7c3468dae5c0c3da Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Fri, 27 Oct 2017 15:54:35 +0200 Subject: [PATCH 08/12] debug osx - make sure no errors are hidden --- test/runtests.jl | 50 +++++++++++++++++++++++++++++++++++++++++++++ test/test_kernel.jl | 6 +----- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 405d3b2c..4a85855a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,56 @@ module TestOpenCL using Base.Test using OpenCL +immutable CLTestStruct + f1::NTuple{3, Float32} + f2::Void + f3::Float32 +end +test_source = " +//packed +struct __attribute__((packed)) Test{ + float3 f1; + int f2; // empty type gets replaced with Int32 (no empty types allowed in OpenCL) + // you might need to define the alignement of fields to match julia's layout + float f3; // for the types used here the alignement matches though! +}; + +// this is a case where Julia and OpenCL packed alignment would differ, so we need to specify it explicitely +struct __attribute__((packed)) Test2{ + long f1; + int __attribute__((aligned (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! +}; + +__kernel void structest(__global float *out, struct Test a, struct Test2 b){ + out[0] = a.f1.x; + out[1] = a.f1.y; + out[2] = a.f1.z; + out[3] = a.f3; + + out[4] = b.f1; + out[5] = b.f2; +} +" + +for device in cl.devices() + if device[:platform][:name] == "Portable Computing Language" + warn("Skipping OpenCL.Kernel constructor for " * + "Portable Computing Language Platform") + continue + end + ctx = cl.Context(device) + prg = cl.Program(ctx, source = test_source) + queue = cl.CmdQueue(ctx) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = cl.Buffer(Float32, ctx, :w, 6) + astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) + bstruct = (1, Int32(4)) + structkernel[queue, (1,)](out, astruct, bstruct) + r = cl.read(queue, out) + @assert r == [1f0, 2f0, 3f0, 22f0, 1f0, 4f0] + println("passed test for $device") +end @testset "aligned convert" begin x = ((10f0, 1f0, 2f0), (10f0, 1f0, 2f0), (10f0, 1f0, 2f0)) diff --git a/test/test_kernel.jl b/test/test_kernel.jl index f4b5ac9e..e14dfae2 100644 --- a/test/test_kernel.jl +++ b/test/test_kernel.jl @@ -1,8 +1,4 @@ -immutable CLTestStruct - f1::NTuple{3, Float32} - f2::Void - f3::Float32 -end + @testset "OpenCL.Kernel" begin From a0a5cd4b61662255326808f57350db93f82acaf2 Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Fri, 27 Oct 2017 19:24:21 +0200 Subject: [PATCH 09/12] osx debug - check sizes --- test/runtests.jl | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 4a85855a..116478b7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,14 +22,11 @@ struct __attribute__((packed)) Test2{ int __attribute__((aligned (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! }; -__kernel void structest(__global float *out, struct Test a, struct Test2 b){ - out[0] = a.f1.x; - out[1] = a.f1.y; - out[2] = a.f1.z; - out[3] = a.f3; - - out[4] = b.f1; - out[5] = b.f2; +__kernel void structest(__global float *out){ + struct Test a; + struct Test2 b; + out[0] = sizeof(a); + out[1] = sizeof(b); } " @@ -39,6 +36,7 @@ for device in cl.devices() "Portable Computing Language Platform") continue end + println(device, ": ") ctx = cl.Context(device) prg = cl.Program(ctx, source = test_source) queue = cl.CmdQueue(ctx) @@ -47,10 +45,9 @@ for device in cl.devices() out = cl.Buffer(Float32, ctx, :w, 6) astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) bstruct = (1, Int32(4)) - structkernel[queue, (1,)](out, astruct, bstruct) + structkernel[queue, (1,)](out) r = cl.read(queue, out) - @assert r == [1f0, 2f0, 3f0, 22f0, 1f0, 4f0] - println("passed test for $device") + println(r[1:2]) end @testset "aligned convert" begin From 25aa8f4c03410f6644740932ecf0e486ca108e10 Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Sat, 28 Oct 2017 19:47:03 +0200 Subject: [PATCH 10/12] break up example --- test/runtests.jl | 57 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 116478b7..f30668ae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,11 +2,44 @@ module TestOpenCL using Base.Test using OpenCL + + +test_source = " +struct __attribute__((packed)) Test2{ + long f1; + int __attribute__((aligned (8))) f2; +}; + +__kernel void structest(__global float *out, struct Test2 b){ + out[0] = b.f1; + out[1] = b.f2; +} +" +for device in cl.devices() + if device[:platform][:name] == "Portable Computing Language" + warn("Skipping OpenCL.Kernel constructor for " * + "Portable Computing Language Platform") + continue + end + println(device, ": ") + ctx = cl.Context(device) + prg = cl.Program(ctx, source = test_source) + queue = cl.CmdQueue(ctx) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = cl.Buffer(Float32, ctx, :w, 6) + bstruct = (1, Int32(4)) + structkernel[queue, (1,)](out, bstruct) + r = cl.read(queue, out) + println(r[1:2]) +end + immutable CLTestStruct f1::NTuple{3, Float32} f2::Void f3::Float32 end + test_source = " //packed struct __attribute__((packed)) Test{ @@ -16,17 +49,11 @@ struct __attribute__((packed)) Test{ float f3; // for the types used here the alignement matches though! }; -// this is a case where Julia and OpenCL packed alignment would differ, so we need to specify it explicitely -struct __attribute__((packed)) Test2{ - long f1; - int __attribute__((aligned (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! -}; - -__kernel void structest(__global float *out){ - struct Test a; - struct Test2 b; - out[0] = sizeof(a); - out[1] = sizeof(b); +__kernel void structest(__global float *out, struct Test a){ + out[0] = a.f1.x; + out[1] = a.f1.y; + out[2] = a.f1.z; + out[3] = a.f3; } " @@ -36,18 +63,16 @@ for device in cl.devices() "Portable Computing Language Platform") continue end - println(device, ": ") ctx = cl.Context(device) prg = cl.Program(ctx, source = test_source) queue = cl.CmdQueue(ctx) cl.build!(prg) structkernel = cl.Kernel(prg, "structest") - out = cl.Buffer(Float32, ctx, :w, 6) + out = cl.Buffer(Float32, ctx, :w, 4) astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) - bstruct = (1, Int32(4)) - structkernel[queue, (1,)](out) + structkernel[queue, (1,)](out, astruct) r = cl.read(queue, out) - println(r[1:2]) + @test r == [1f0, 2f0, 3f0, 22f0] end @testset "aligned convert" begin From e27eb7a026b8bebfb53bde9481ed279f25b4db91 Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Sat, 28 Oct 2017 20:07:18 +0200 Subject: [PATCH 11/12] add osx problem to other PR --- test/runtests.jl | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index f30668ae..e14cb9a1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,37 +3,6 @@ using Base.Test using OpenCL - -test_source = " -struct __attribute__((packed)) Test2{ - long f1; - int __attribute__((aligned (8))) f2; -}; - -__kernel void structest(__global float *out, struct Test2 b){ - out[0] = b.f1; - out[1] = b.f2; -} -" -for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - warn("Skipping OpenCL.Kernel constructor for " * - "Portable Computing Language Platform") - continue - end - println(device, ": ") - ctx = cl.Context(device) - prg = cl.Program(ctx, source = test_source) - queue = cl.CmdQueue(ctx) - cl.build!(prg) - structkernel = cl.Kernel(prg, "structest") - out = cl.Buffer(Float32, ctx, :w, 6) - bstruct = (1, Int32(4)) - structkernel[queue, (1,)](out, bstruct) - r = cl.read(queue, out) - println(r[1:2]) -end - immutable CLTestStruct f1::NTuple{3, Float32} f2::Void From 9cc2710c6c535a30b54a62277029541e3fe2f475 Mon Sep 17 00:00:00 2001 From: SimonDanisch Date: Sat, 28 Oct 2017 23:20:56 +0200 Subject: [PATCH 12/12] don't test problematic kernel on problematic osx --- test/runtests.jl | 43 +----------------------- test/test_kernel.jl | 82 +++++++++++++++++++++++++++++---------------- 2 files changed, 54 insertions(+), 71 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index e14cb9a1..2a785d96 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,48 +3,7 @@ using Base.Test using OpenCL -immutable CLTestStruct - f1::NTuple{3, Float32} - f2::Void - f3::Float32 -end - -test_source = " -//packed -struct __attribute__((packed)) Test{ - float3 f1; - int f2; // empty type gets replaced with Int32 (no empty types allowed in OpenCL) - // you might need to define the alignement of fields to match julia's layout - float f3; // for the types used here the alignement matches though! -}; - -__kernel void structest(__global float *out, struct Test a){ - out[0] = a.f1.x; - out[1] = a.f1.y; - out[2] = a.f1.z; - out[3] = a.f3; -} -" - -for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - warn("Skipping OpenCL.Kernel constructor for " * - "Portable Computing Language Platform") - continue - end - ctx = cl.Context(device) - prg = cl.Program(ctx, source = test_source) - queue = cl.CmdQueue(ctx) - cl.build!(prg) - structkernel = cl.Kernel(prg, "structest") - out = cl.Buffer(Float32, ctx, :w, 4) - astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) - structkernel[queue, (1,)](out, astruct) - r = cl.read(queue, out) - @test r == [1f0, 2f0, 3f0, 22f0] -end - -@testset "aligned convert" begin +@testset "layout" begin x = ((10f0, 1f0, 2f0), (10f0, 1f0, 2f0), (10f0, 1f0, 2f0)) clx = cl.replace_different_layout(x) diff --git a/test/test_kernel.jl b/test/test_kernel.jl index e14dfae2..5b5fbcbd 100644 --- a/test/test_kernel.jl +++ b/test/test_kernel.jl @@ -1,4 +1,8 @@ - +immutable CLTestStruct + f1::NTuple{3, Float32} + f2::Void + f3::Float32 +end @testset "OpenCL.Kernel" begin @@ -188,6 +192,38 @@ end end + + test_source = " + struct __attribute__((packed)) Test2{ + long f1; + int __attribute__((aligned (8))) f2; + }; + __kernel void structest(__global float *out, struct Test2 b){ + out[0] = b.f1; + out[1] = b.f2; + } + " + for device in cl.devices() + if device[:platform][:name] == "Portable Computing Language" + warn("Skipping OpenCL.Kernel constructor for " * + "Portable Computing Language Platform") + continue + end + if is_apple() + continue + end + ctx = cl.Context(device) + prg = cl.Program(ctx, source = test_source) + queue = cl.CmdQueue(ctx) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = cl.Buffer(Float32, ctx, :w, 2) + bstruct = (1, Int32(4)) + structkernel[queue, (1,)](out, bstruct) + r = cl.read(queue, out) + @test r == [1f0, 4f0] + end + test_source = " //packed struct __attribute__((packed)) Test{ @@ -196,41 +232,29 @@ // you might need to define the alignement of fields to match julia's layout float f3; // for the types used here the alignement matches though! }; - // this is a case where Julia and OpenCL packed alignment would differ, so we need to specify it explicitely - struct __attribute__((packed)) Test2{ - long f1; - int __attribute__((aligned (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8! - }; - - __kernel void structest(__global float *out, struct Test a, struct Test2 b){ + __kernel void structest(__global float *out, struct Test a){ out[0] = a.f1.x; out[1] = a.f1.y; out[2] = a.f1.z; out[3] = a.f3; - - out[4] = b.f1; - out[5] = b.f2; } " - @testset "OpenCL.Kernel convert structs" begin - for device in cl.devices() - if device[:platform][:name] == "Portable Computing Language" - warn("Skipping OpenCL.Kernel constructor for " * - "Portable Computing Language Platform") - continue - end - ctx = cl.Context(device) - prg = cl.Program(ctx, source = test_source) - queue = cl.CmdQueue(ctx) - cl.build!(prg) - structkernel = cl.Kernel(prg, "structest") - out = cl.Buffer(Float32, ctx, :w, 6) - astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) - bstruct = (1, Int32(4)) - structkernel[queue, (1,)](out, astruct, bstruct) - r = cl.read(queue, out) - @test r == [1f0, 2f0, 3f0, 22f0, 1f0, 4f0] + for device in cl.devices() + if device[:platform][:name] == "Portable Computing Language" + warn("Skipping OpenCL.Kernel constructor for " * + "Portable Computing Language Platform") + continue end + ctx = cl.Context(device) + prg = cl.Program(ctx, source = test_source) + queue = cl.CmdQueue(ctx) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = cl.Buffer(Float32, ctx, :w, 4) + astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0) + structkernel[queue, (1,)](out, astruct) + r = cl.read(queue, out) + @test r == [1f0, 2f0, 3f0, 22f0] end end