-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathcuda_helper.h
58 lines (47 loc) · 2.21 KB
/
cuda_helper.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#ifndef _GNN_CUDA_HELPER_H_
#define _GNN_CUDA_HELPER_H_
#include "legion.h"
#include <cudnn.h>
#define FatalError(s) do { \
std::stringstream _where, _message; \
_where << __FILE__ << ':' << __LINE__; \
_message << std::string(s) + "\n" << __FILE__ << ':' << __LINE__; \
std::cerr << _message.str() << "\nAborting...\n"; \
exit(1); \
} while(0)
#define checkCUDNN(status) do { \
std::stringstream _error; \
if (status != CUDNN_STATUS_SUCCESS) { \
_error << "CUDNN failure: " << cudnnGetErrorString(status); \
FatalError(_error.str()); \
} \
} while(0)
#define checkCUDA(status) do { \
std::stringstream _error; \
if (status != 0) { \
_error << "Cuda failure: " << status; \
FatalError(_error.str()); \
} \
} while(0)
// CUDA: grid stride looping
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
// Use 1024 threads per block, which requires cuda sm_2x or above
const int CUDA_NUM_THREADS = 512;
const int BLOCK_SIZE_LIMIT = 32768;
using namespace Legion;
// CUDA: number of blocks for threads.
inline int GET_BLOCKS(const int N)
{
int ret = (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
return (ret > BLOCK_SIZE_LIMIT) ? BLOCK_SIZE_LIMIT : ret;
}
__global__
void scale_kernel(float* ptr, coord_t size, float a, float b);
__global__
void assign_kernel(float* ptr, coord_t size, float value);
__global__
void copy_kernel(float* dst, const float* src, coord_t size);
__global__
void add_kernel(float* dst, const float* src, coord_t size);
#endif