JuliaGPU · vchuravy · Apr 29, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024
diff --git a/examples/matmul.jl b/examples/matmul.jl
@@ -2,34 +2,34 @@ using KernelAbstractions, Test, Random
 include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) # Load backend
 
 # Simple kernel for matrix multiplication
-@kernel function matmul_kernel!(a, b, c)
+@kernel function matmul_kernel!(output, a, b)
     i, j = @index(Global, NTuple)
 
     # creating a temporary sum variable for matrix multiplication
-    tmp_sum = zero(eltype(c))
+    tmp_sum = zero(eltype(output))
     for k = 1:size(a)[2]
         tmp_sum += a[i,k] * b[k, j]
     end
 
-    c[i,j] = tmp_sum
+    output[i,j] = tmp_sum
 end
 
 # Creating a wrapper kernel for launching with error checks
-function matmul!(a, b, c)
+function matmul!(output, a, b)
     if size(a)[2] != size(b)[1]
         println("Matrix size mismatch!")
         return nothing
     end
     backend = KernelAbstractions.get_backend(a)
     kernel! = matmul_kernel!(backend)
-    kernel!(a, b, c, ndrange=size(c)) 
+    kernel!(output, a, b, ndrange=size(output)) 
 end
 
 a = rand!(allocate(backend, Float32, 256, 123))
 b = rand!(allocate(backend, Float32, 123, 45))
-c = KernelAbstractions.zeros(backend, Float32, 256, 45)
+output = KernelAbstractions.zeros(backend, Float32, 256, 45)
 
-matmul!(a,b,c)
+matmul!(output, a,b)
 KernelAbstractions.synchronize(backend)
 
-@test isapprox(c, a*b)
+@test isapprox(output, a*b)
diff --git a/examples/naive_transpose.jl b/examples/naive_transpose.jl
@@ -3,7 +3,7 @@ include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) #
 
 @kernel function naive_transpose_kernel!(a, b)
     i, j = @index(Global, NTuple)
-    @inbounds b[i, j] = a[j, i]
+    @inbounds a[i, j] = b[j, i]
 end
 
 # create wrapper function to check inputs
@@ -24,8 +24,8 @@ end
 res = 1024
 
 # creating initial arrays
-a = rand!(allocate(backend, Float32, res, res))
-b = KernelAbstractions.zeros(backend, Float32, res, res)
+b = rand!(allocate(backend, Float32, res, res))
+a = KernelAbstractions.zeros(backend, Float32, res, res)
 
 naive_transpose!(a,b)
 KernelAbstractions.synchronize(backend)