JuliaGPU · maleadt · Jun 16, 2023 · Jun 14, 2023 · Jun 14, 2023 · Jun 16, 2023
diff --git a/Project.toml b/Project.toml
@@ -10,7 +10,6 @@ CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82"
 CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc"
 CUDA_Runtime_Discovery = "1af6417a-86b4-443c-805f-a4643ffb695f"
 CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
-CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
 GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"

diff --git a/lib/cusparse/linalg.jl b/lib/cusparse/linalg.jl
@@ -16,7 +16,7 @@ function sum_dim1(A::CuSparseMatrixCSR{T}) where {T}
     m, n = size(A)
     rowsum = CuVector{Float64}(undef, m)
     kernel_f = @cuda launch=false kernel(T, rowsum, A)
-    
+
     config = launch_configuration(kernel_f.fun)
     threads = min(n, config.threads)
     blocks = cld(n, threads)
@@ -40,7 +40,7 @@ function sum_dim2(A::CuSparseMatrixCSR{T}) where {T}
     m, n = size(A)
     colsum = CuVector{Float64}(undef, n)
     kernel_f = @cuda launch=false kernel(T, colsum, A)
-    
+
     config = launch_configuration(kernel_f.fun)
     threads = min(m, config.threads)
     blocks = cld(m, threads)
@@ -97,7 +97,7 @@ function SparseArrays.droptol!(A::CuSparseMatrixCOO, tol::Real)
     copyto!(A, B)
 end
 
-function Base.reshape(A::CuSparseMatrixCOO, dims::NTuple{N,Int}) where {N}
+function Base.reshape(A::CuSparseMatrixCOO, dims::Dims)
     nrows, ncols = size(A)
     flat_indices = nrows .* (A.colInd .- 1) .+ A.rowInd .- 1
     new_col, new_row = div.(flat_indices, dims[1]) .+ 1, rem.(flat_indices, dims[1]) .+ 1
@@ -125,7 +125,7 @@ function LinearAlgebra.kron(A::CuSparseMatrixCOO{T}, B::CuSparseMatrixCOO{T}) wh
     col .+= repeat(B.colInd .- 1, outer = Annz) .+ 1
 
     data .*= repeat(B.nzVal, outer = Annz)
-    
+
     sparse(row, col, data, out_shape..., fmt = :coo)
 end
 
@@ -150,7 +150,7 @@ function LinearAlgebra.kron(A::CuSparseMatrixCOO{T}, B::Diagonal) where {T}
     col .+= CuVector(repeat(0:nB-1, outer = Annz)) .+ 1
 
     data .*= repeat(CUDA.ones(T, nB), outer = Annz)
-    
+
     sparse(row, col, data, out_shape..., fmt = :coo)
 end
 
@@ -175,62 +175,62 @@ function LinearAlgebra.kron(A::Diagonal, B::CuSparseMatrixCOO{T}) where {T}
     col .+= repeat(B.colInd .- 1, outer = Annz) .+ 1
 
     data .*= repeat(B.nzVal, outer = Annz)
-    
+
     sparse(row, col, data, out_shape..., fmt = :coo)
 end
 
 for SparseMatrixType in [:CuSparseMatrixCSC, :CuSparseMatrixCSR]
     @eval begin
-        LinearAlgebra.triu(A::$SparseMatrixType{T,M}, k::Integer) where {T,M} = 
+        LinearAlgebra.triu(A::$SparseMatrixType{T}, k::Integer) where {T} =
             $SparseMatrixType( triu(CuSparseMatrixCOO(A), k) )
-        LinearAlgebra.triu(A::Transpose{T,<:$SparseMatrixType}, k::Integer) where {T} = 
+        LinearAlgebra.triu(A::Transpose{T,<:$SparseMatrixType}, k::Integer) where {T} =
             $SparseMatrixType( triu(CuSparseMatrixCOO(_sptranspose(parent(A))), k) )
-        LinearAlgebra.triu(A::Adjoint{T,<:$SparseMatrixType}, k::Integer) where {T} = 
+        LinearAlgebra.triu(A::Adjoint{T,<:$SparseMatrixType}, k::Integer) where {T} =
             $SparseMatrixType( triu(CuSparseMatrixCOO(_spadjoint(parent(A))), k) )
-        
-        LinearAlgebra.tril(A::$SparseMatrixType{T,M}, k::Integer) where {T,M} = 
+
+        LinearAlgebra.tril(A::$SparseMatrixType{T}, k::Integer) where {T} =
             $SparseMatrixType( tril(CuSparseMatrixCOO(A), k) )
-        LinearAlgebra.tril(A::Transpose{T,<:$SparseMatrixType}, k::Integer) where {T} = 
+        LinearAlgebra.tril(A::Transpose{T,<:$SparseMatrixType}, k::Integer) where {T} =
             $SparseMatrixType( tril(CuSparseMatrixCOO(_sptranspose(parent(A))), k) )
-        LinearAlgebra.tril(A::Adjoint{T,<:$SparseMatrixType}, k::Integer) where {T} = 
+        LinearAlgebra.tril(A::Adjoint{T,<:$SparseMatrixType}, k::Integer) where {T} =
             $SparseMatrixType( tril(CuSparseMatrixCOO(_spadjoint(parent(A))), k) )
-        
-        LinearAlgebra.triu(A::Union{$SparseMatrixType{T,M}, Transpose{T,<:$SparseMatrixType}, Adjoint{T,<:$SparseMatrixType}}) where {T,M} = 
+
+        LinearAlgebra.triu(A::Union{$SparseMatrixType{T}, Transpose{T,<:$SparseMatrixType}, Adjoint{T,<:$SparseMatrixType}}) where {T} =
             $SparseMatrixType( triu(CuSparseMatrixCOO(A), 0) )
-        LinearAlgebra.tril(A::Union{$SparseMatrixType{T,M}, Transpose{T,<:$SparseMatrixType}, Adjoint{T,<:$SparseMatrixType}}) where {T,M} = 
+        LinearAlgebra.tril(A::Union{$SparseMatrixType{T},Transpose{T,<:$SparseMatrixType}, Adjoint{T,<:$SparseMatrixType}}) where {T} =
             $SparseMatrixType( tril(CuSparseMatrixCOO(A), 0) )
 
-        LinearAlgebra.kron(A::$SparseMatrixType{T,M}, B::$SparseMatrixType{T,M}) where {T,M} = 
+        LinearAlgebra.kron(A::$SparseMatrixType{T}, B::$SparseMatrixType{T}) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(A), CuSparseMatrixCOO(B)) )
-        LinearAlgebra.kron(A::$SparseMatrixType{T,M}, B::Diagonal) where {T,M} = 
+        LinearAlgebra.kron(A::$SparseMatrixType{T}, B::Diagonal) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(A), B) )
-        LinearAlgebra.kron(A::Diagonal, B::$SparseMatrixType{T,M}) where {T,M} = 
+        LinearAlgebra.kron(A::Diagonal, B::$SparseMatrixType{T}) where {T} =
             $SparseMatrixType( kron(A, CuSparseMatrixCOO(B)) )
-        
-        LinearAlgebra.kron(A::Transpose{T,<:$SparseMatrixType}, B::$SparseMatrixType{T,M}) where {T,M} = 
+
+        LinearAlgebra.kron(A::Transpose{T,<:$SparseMatrixType}, B::$SparseMatrixType{T}) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(_sptranspose(parent(A))), CuSparseMatrixCOO(B)) )
-        LinearAlgebra.kron(A::$SparseMatrixType{T,M}, B::Transpose{T,<:$SparseMatrixType}) where {T,M} = 
+        LinearAlgebra.kron(A::$SparseMatrixType{T}, B::Transpose{T,<:$SparseMatrixType}) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(A), CuSparseMatrixCOO(_sptranspose(parent(B)))) )
-        LinearAlgebra.kron(A::Transpose{T,<:$SparseMatrixType}, B::Transpose{T,<:$SparseMatrixType}) where {T} = 
+        LinearAlgebra.kron(A::Transpose{T,<:$SparseMatrixType}, B::Transpose{T,<:$SparseMatrixType}) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(_sptranspose(parent(A))), CuSparseMatrixCOO(_sptranspose(parent(B)))) )
-        LinearAlgebra.kron(A::Transpose{T,<:$SparseMatrixType}, B::Diagonal) where {T} = 
+        LinearAlgebra.kron(A::Transpose{T,<:$SparseMatrixType}, B::Diagonal) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(_sptranspose(parent(A))), B) )
-        LinearAlgebra.kron(A::Diagonal, B::Transpose{T,<:$SparseMatrixType}) where {T} = 
+        LinearAlgebra.kron(A::Diagonal, B::Transpose{T,<:$SparseMatrixType}) where {T} =
             $SparseMatrixType( kron(A, CuSparseMatrixCOO(_sptranspose(parent(B)))) )
 
-        LinearAlgebra.kron(A::Adjoint{T,<:$SparseMatrixType}, B::$SparseMatrixType{T,M}) where {T,M} = 
+        LinearAlgebra.kron(A::Adjoint{T,<:$SparseMatrixType}, B::$SparseMatrixType{T}) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(_spadjoint(parent(A))), CuSparseMatrixCOO(B)) )
-        LinearAlgebra.kron(A::$SparseMatrixType{T,M}, B::Adjoint{T,<:$SparseMatrixType}) where {T,M} = 
+        LinearAlgebra.kron(A::$SparseMatrixType{T}, B::Adjoint{T,<:$SparseMatrixType}) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(A), CuSparseMatrixCOO(_spadjoint(parent(B)))) )
-        LinearAlgebra.kron(A::Adjoint{T,<:$SparseMatrixType}, B::Adjoint{T,<:$SparseMatrixType}) where {T} = 
+        LinearAlgebra.kron(A::Adjoint{T,<:$SparseMatrixType}, B::Adjoint{T,<:$SparseMatrixType}) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(_spadjoint(parent(A))), CuSparseMatrixCOO(_spadjoint(parent(B)))) )
-        LinearAlgebra.kron(A::Adjoint{T,<:$SparseMatrixType}, B::Diagonal) where {T} = 
+        LinearAlgebra.kron(A::Adjoint{T,<:$SparseMatrixType}, B::Diagonal) where {T} =
             $SparseMatrixType( kron(CuSparseMatrixCOO(_spadjoint(parent(A))), B) )
-        LinearAlgebra.kron(A::Diagonal, B::Adjoint{T,<:$SparseMatrixType}) where {T} = 
+        LinearAlgebra.kron(A::Diagonal, B::Adjoint{T,<:$SparseMatrixType}) where {T} =
             $SparseMatrixType( kron(A, CuSparseMatrixCOO(_spadjoint(parent(B)))) )
 
 
-        function Base.reshape(A::$SparseMatrixType, dims::NTuple{N,Int}) where {N}
+        function Base.reshape(A::$SparseMatrixType, dims::Dims)
             B = CuSparseMatrixCOO(A)
             $SparseMatrixType(reshape(B, dims))
         end
@@ -244,16 +244,16 @@ for SparseMatrixType in [:CuSparseMatrixCSC, :CuSparseMatrixCSR]
         function LinearAlgebra.exp(A::$SparseMatrixType; threshold = 1e-7, nonzero_tol = 1e-14)
             rows = LinearAlgebra.checksquare(A) # Throws exception if not square
             typeA = eltype(A)
-        
+
             mat_norm = norm(A, Inf)
             scaling_factor = nextpow(2, mat_norm) # Native routine, faster
             A = A ./ scaling_factor
             delta = 1
-        
+
             P = $SparseMatrixType(spdiagm(0 => ones(eltype(A), rows)))
             next_term = P
             n = 1
-        
+
             while delta > threshold
                 next_term = typeA(1 / n) * A * next_term
                 droptol!(next_term, nonzero_tol)

diff --git a/src/array.jl b/src/array.jl
@@ -237,9 +237,26 @@ array) or a tuple of the array dimensions. `own` optionally specified whether Ju
 take ownership of the memory, calling `cudaFree` when the array is no longer referenced. The
 `ctx` argument determines the CUDA context where the data is allocated in.
 """
-function Base.unsafe_wrap(::Union{Type{CuArray},Type{CuArray{T}},Type{CuArray{T,N}},Type{CuArray{T,N,B}}},
+function Base.unsafe_wrap(::Union{Type{CuArray},Type{CuArray{T}},Type{CuArray{T,N}}},
+                          ptr::CuPtr{T}, dims::NTuple{N,Int};
+                          own::Bool=false, ctx::CuContext=context()) where {T,N}
+  buf = _unsafe_wrap(T, ptr, dims; own, ctx)
+  storage = ArrayStorage(buf, own ? 1 : -1)
+  CuArray{T, length(dims)}(storage, dims)
+end
+function Base.unsafe_wrap(::Type{CuArray{T,N,B}},
                           ptr::CuPtr{T}, dims::NTuple{N,Int};
                           own::Bool=false, ctx::CuContext=context()) where {T,N,B}
+  buf = _unsafe_wrap(T, ptr, dims; own, ctx)
+  if typeof(buf) !== B
+    error("Declared buffer type does not match inferred buffer type.")
+  end
+  storage = ArrayStorage(buf, own ? 1 : -1)
+  CuArray{T, length(dims)}(storage, dims)
+end
+
+function _unsafe_wrap(::Type{T}, ptr::CuPtr{T}, dims::NTuple{N,Int};
+                      own::Bool=false, ctx::CuContext=context()) where {T,N}
   isbitstype(T) || error("Can only unsafe_wrap a pointer to a bits type")
   sz = prod(dims) * sizeof(T)
 
@@ -259,16 +276,15 @@ function Base.unsafe_wrap(::Union{Type{CuArray},Type{CuArray{T}},Type{CuArray{T,
   catch err
       error("Could not identify the buffer type; are you passing a valid CUDA pointer to unsafe_wrap?")
   end
-
-  if @isdefined(B) && typeof(buf) !== B
-    error("Declared buffer type does not match inferred buffer type.")
-  end
-
-  storage = ArrayStorage(buf, own ? 1 : -1)
-  CuArray{T, length(dims)}(storage, dims)
+  return buf
 end
 
-function Base.unsafe_wrap(Atype::Union{Type{CuArray},Type{CuArray{T}},Type{CuArray{T,1}},Type{CuArray{T,1,B}}},
+function Base.unsafe_wrap(Atype::Union{Type{CuArray},Type{CuArray{T}},Type{CuArray{T,1}}},
+                          p::CuPtr{T}, dim::Int;
+                          own::Bool=false, ctx::CuContext=context()) where {T}
+  unsafe_wrap(Atype, p, (dim,); own, ctx)
+end
+function Base.unsafe_wrap(Atype::Type{CuArray{T,1,B}},
                           p::CuPtr{T}, dim::Int;
                           own::Bool=false, ctx::CuContext=context()) where {T,B}
   unsafe_wrap(Atype, p, (dim,); own, ctx)