forked from weft/warp
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrebase_yield.cu
61 lines (46 loc) · 1.56 KB
/
rebase_yield.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#include <cuda.h>
#include <stdio.h>
#include "datadef.h"
#include "warp_device.cuh"
__global__ void rebase_yield_kernel(unsigned N, float keff, particle_data* d_particles ){
int tid = threadIdx.x+blockIdx.x*blockDim.x;
if (tid >= N){return;}
// declare shared variables
__shared__ unsigned* rn_bank;
__shared__ unsigned* yield;
// have thread 0 of block copy all pointers and static info into shared memory
if (threadIdx.x == 0){
rn_bank = d_particles[0].rn_bank;
yield = d_particles[0].yield;
}
// make sure shared loads happen before anything else
__syncthreads();
if (yield[tid]==0){return;}
unsigned this_yield = yield[tid];
unsigned rn = rn_bank[tid];
float new_yield = (float) this_yield / keff;
unsigned i_new_yield = (unsigned) new_yield;
float rn1 = get_rand(&rn);
if((float)i_new_yield+rn1 < new_yield){
this_yield = i_new_yield+1;
}
else{
this_yield = i_new_yield;
}
yield[tid] = this_yield;
rn_bank[tid]= rn;
}
/**
* \brief
* \details
*
* @param[in] NUM_THREADS - the number of threads to run per thread block
* @param[in] N - the total number of threads to launch on the grid
* @param[in] keff - k-effective of the current cycle
* @param[in] d_particles - device pointer to particle data pointer array
*/
void rebase_yield( unsigned NUM_THREADS, unsigned N, float keff, particle_data* d_particles ){
unsigned blks = ( N + NUM_THREADS - 1 ) / NUM_THREADS;
rebase_yield_kernel <<< blks, NUM_THREADS >>> ( N, keff, d_particles );
cudaThreadSynchronize();
}