-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathkernel.cu
41 lines (30 loc) · 1.07 KB
/
kernel.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include "kernel.cuh"
#include <stdio.h>
__global__
void VecAdd(const int* A, const int* B, int* C, int N)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N)
C[i] = A[i] + B[i];
}
__global__
void VecSub(const int* A, const int* B, int* C, int N)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N)
C[i] = A[i] - B[i];
}
void callVecAdd(const int* d_A, const int* d_B, int* d_C, int N){
int threadsPerBlock = threadsPerBlock = 256;
int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
printf("Launching kernel: blocks %d, thread/block %d\n",
blocksPerGrid, threadsPerBlock);
VecAdd << <blocksPerGrid, threadsPerBlock >> > (d_A, d_B, d_C, N);
}
void callVecSub(const int* d_A, const int* d_B, int* d_C, int N){
int threadsPerBlock = threadsPerBlock = 256;
int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
printf("Launching kernel: blocks %d, thread/block %d\n",
blocksPerGrid, threadsPerBlock);
VecSub << <blocksPerGrid, threadsPerBlock >> > (d_A, d_B, d_C, N);
}