From 9aface197eec36880a0d01aafedb46d23b611375 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 10 May 2019 11:46:50 -0700 Subject: [PATCH] fixed issues after rebasing --- cpp/src/COOtoCSR.cuh | 4 +-- cpp/src/cugraph.cu | 4 +-- cpp/src/graph_utils.cuh | 60 +++++++++++++++--------------------- cpp/src/grmat.cu | 2 +- cpp/src/renumber.cuh | 2 +- cpp/src/snmg/pagerank.cuh | 6 ++-- cpp/src/snmg/spmv.cuh | 4 +-- cpp/src/two_hop_neighbors.cu | 2 +- 8 files changed, 36 insertions(+), 48 deletions(-) diff --git a/cpp/src/COOtoCSR.cuh b/cpp/src/COOtoCSR.cuh index 0324062db32..f00b352f0e4 100644 --- a/cpp/src/COOtoCSR.cuh +++ b/cpp/src/COOtoCSR.cuh @@ -74,7 +74,7 @@ gdf_error ConvertCOOtoCSR(T* sources, T* destinations, int64_t nnz, CSR_Resultdata != nullptr) { ALLOC_FREE_TRY(col->data, stream); } @@ -599,7 +599,7 @@ gdf_error gdf_louvain(gdf_graph *graph, void *final_modularity, void *num_level, value_ptr = graph->adjList->edge_data->data; } else { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; d_values.resize(graph->adjList->indices->size); thrust::fill(rmm::exec_policy(stream)->on(stream), d_values.begin(), d_values.end(), 1.0); value_ptr = (void * ) thrust::raw_pointer_cast(d_values.data()); diff --git a/cpp/src/graph_utils.cuh b/cpp/src/graph_utils.cuh index ea30cfc1857..b173cb61e8e 100644 --- a/cpp/src/graph_utils.cuh +++ b/cpp/src/graph_utils.cuh @@ -39,20 +39,6 @@ namespace cugraph #define DEFAULT_MASK 0xffffffff #define US -//error check -#ifdef DEBUG -#define WHERE " at: " << __FILE__ << ':' << __LINE__ -#define cudaCheckError() { \ - cudaError_t e=cudaGetLastError(); \ - if(e!=cudaSuccess) { \ - std::cerr << "Cuda failure: " << cudaGetErrorString(e) << WHERE << std::endl; \ - } \ - } -#else -#define cudaCheckError() -#define WHERE "" -#endif - template static __device__ __forceinline__ T shfl_up(T r, int offset, int bound = 32, int mask = DEFAULT_MASK) { #if __CUDA_ARCH__ >= 300 @@ -128,7 +114,7 @@ namespace cugraph //dot template T dot(size_t n, T* x, T* y) { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; T result = thrust::inner_product(rmm::exec_policy(stream)->on(stream), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), @@ -153,7 +139,7 @@ namespace cugraph template void axpy(size_t n, T a, T* x, T* y) { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; thrust::transform(rmm::exec_policy(stream)->on(stream), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), @@ -174,7 +160,7 @@ namespace cugraph template T nrm2(size_t n, T* x) { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; T init = 0; T result = std::sqrt(thrust::transform_reduce(rmm::exec_policy(stream)->on(stream), thrust::device_pointer_cast(x), @@ -188,7 +174,7 @@ namespace cugraph template T nrm1(size_t n, T* x) { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; T result = thrust::reduce(rmm::exec_policy(stream)->on(stream), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n)); @@ -198,7 +184,7 @@ namespace cugraph template void scal(size_t n, T val, T* x) { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; thrust::transform(rmm::exec_policy(stream)->on(stream), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), @@ -208,9 +194,21 @@ namespace cugraph cudaCheckError(); } + template + void addv(size_t n, T val, T* x) { + cudaStream_t stream {nullptr}; + thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::device_pointer_cast(x), + thrust::device_pointer_cast(x + n), + thrust::make_constant_iterator(val), + thrust::device_pointer_cast(x), + thrust::plus()); + cudaCheckError(); + } + template void fill(size_t n, T* x, T value) { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; thrust::fill(rmm::exec_policy(stream)->on(stream), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), value); @@ -231,7 +229,7 @@ namespace cugraph void copy(size_t n, T *x, T *res) { thrust::device_ptr dev_ptr(x); thrust::device_ptr res_ptr(res); - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; thrust::copy_n(rmm::exec_policy(stream)->on(stream), dev_ptr, n, res_ptr); cudaCheckError(); } @@ -258,7 +256,7 @@ namespace cugraph template void update_dangling_nodes(size_t n, T* dangling_nodes, T damping_factor) { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; thrust::transform_if(rmm::exec_policy(stream)->on(stream), thrust::device_pointer_cast(dangling_nodes), thrust::device_pointer_cast(dangling_nodes + n), @@ -275,21 +273,11 @@ namespace cugraph for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < e; i += gridDim.x * blockDim.x) atomicAdd(°ree[ind[i]], 1.0); } - template - __global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS) - equi_prob(const IndexType n, - const IndexType e, - const IndexType *ind, - ValueType *val, - IndexType *degree) { - for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < e; i += gridDim.x * blockDim.x) - val[i] = 1.0 / degree[ind[i]]; - } template __global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS) - flag_leafs(const IndexType n, IndexType *degree, ValueType *bookmark) { - for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) + flag_leafs_kernel(const size_t n, const IndexType *degree, ValueType *bookmark) { + for (auto i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) if (degree[i] == 0) bookmark[i] = 1.0; } @@ -383,7 +371,7 @@ namespace cugraph nblocks.x = min((n + nthreads.x - 1) / nthreads.x, CUDA_MAX_BLOCKS); nblocks.y = 1; nblocks.z = 1; - flag_leafs <<>>(n, degree, bookmark); + flag_leafs_kernel <<>>(n, degree, bookmark); cudaCheckError(); ALLOC_FREE_TRY(degree, stream); } @@ -406,7 +394,7 @@ namespace cugraph // This will sort the COO Matrix, row will be sorted and each column of same row will be sorted. template void remove_duplicate(IndexType* src, IndexType* dest, ValueType* val, SizeT &nnz) { - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; if (val != NULL) { thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), thrust::raw_pointer_cast(val), diff --git a/cpp/src/grmat.cu b/cpp/src/grmat.cu index cd4523b5da6..8b5a50aacd7 100644 --- a/cpp/src/grmat.cu +++ b/cpp/src/grmat.cu @@ -177,7 +177,7 @@ gdf_error main_(gdf_column *src, gdf_column *dest, gdf_column *val, CommandLine if (util::SetDevice(gpu_idx[0])) return GDF_CUDA_ERROR; - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; ALLOC_TRY((void**)&coo.row, sizeof(VertexId) * rmat_all_edges, stream); ALLOC_TRY((void**)&coo.col, sizeof(VertexId) * rmat_all_edges, stream); if (val != nullptr) diff --git a/cpp/src/renumber.cuh b/cpp/src/renumber.cuh index d59d3df4e50..b5681491776 100644 --- a/cpp/src/renumber.cuh +++ b/cpp/src/renumber.cuh @@ -203,7 +203,7 @@ namespace cugraph { // We need 3 for hashing, and one array for data // - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; T_in *hash_data; diff --git a/cpp/src/snmg/pagerank.cuh b/cpp/src/snmg/pagerank.cuh index e19f3a855de..e8c4127ed5b 100644 --- a/cpp/src/snmg/pagerank.cuh +++ b/cpp/src/snmg/pagerank.cuh @@ -84,8 +84,8 @@ class SNMGpagerank e_loc = tmp_e; stream = nullptr; is_setup = false; - ALLOC_MANAGED_TRY ((void**)&bookmark, sizeof(ValueType) * v_glob, stream); - ALLOC_MANAGED_TRY ((void**)&val, sizeof(ValueType) * e_loc, stream); + ALLOC_TRY ((void**)&bookmark, sizeof(ValueType) * v_glob, stream); + ALLOC_TRY ((void**)&val, sizeof(ValueType) * e_loc, stream); } ~SNMGpagerank() { ALLOC_FREE_TRY(bookmark, stream); @@ -113,7 +113,7 @@ class SNMGpagerank alpha=_alpha; ValueType zero = 0.0; IndexType *degree; - ALLOC_MANAGED_TRY ((void**)°ree, sizeof(IndexType) * v_glob, stream); + ALLOC_TRY ((void**)°ree, sizeof(IndexType) * v_glob, stream); // TODO snmg degree int nthreads = min(static_cast(e_loc), 256); diff --git a/cpp/src/snmg/spmv.cuh b/cpp/src/snmg/spmv.cuh index 5d042a0a1c3..27e15a1b1e3 100644 --- a/cpp/src/snmg/spmv.cuh +++ b/cpp/src/snmg/spmv.cuh @@ -65,14 +65,14 @@ class SNMGcsrmv e_loc = tmp; // Allocate the local result - ALLOC_MANAGED_TRY ((void**)&y_loc, v_loc*sizeof(ValueType), stream); + ALLOC_TRY ((void**)&y_loc, v_loc*sizeof(ValueType), stream); // get temporary storage size for CUB cub::DeviceSpmv::CsrMV(cub_d_temp_storage, cub_temp_storage_bytes, val, off, ind, x[i], y_loc, v_loc, v_glob, e_loc); cudaCheckError(); // Allocate CUB's temporary storage - ALLOC_MANAGED_TRY ((void**)&cub_d_temp_storage, cub_temp_storage_bytes, stream); + ALLOC_TRY ((void**)&cub_d_temp_storage, cub_temp_storage_bytes, stream); } ~SNMGcsrmv() { diff --git a/cpp/src/two_hop_neighbors.cu b/cpp/src/two_hop_neighbors.cu index 2f5a576c096..de8bd9bfb0c 100644 --- a/cpp/src/two_hop_neighbors.cu +++ b/cpp/src/two_hop_neighbors.cu @@ -39,7 +39,7 @@ gdf_error gdf_get_two_hop_neighbors_impl(IndexType num_verts, IndexType num_edges; cudaMemcpy(&num_edges, &offsets[num_verts], sizeof(IndexType), cudaMemcpyDefault); - auto stream = cudaStream_t{nullptr}; + cudaStream_t stream {nullptr}; // Allocate memory for temporary stuff IndexType *exsum_degree = nullptr;