-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlife32x32.cu
149 lines (116 loc) · 4.28 KB
/
life32x32.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#include <chrono>
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
static int SLEEP_TIME = 50000;
static int GENERATION_STEP = 1;
__global__ void singleBlockLifeKernel(uint32_t *cols, int numGenerations) {
__shared__ uint8_t grid[1024]; // TODO Should this be uint32_t?
int colIdx = threadIdx.x;
// Copy data from global memory to shared memory
uint32_t colData = cols[colIdx];
// Split the data out into an easy to handle array
for (int i = 0; i < 32; ++i) {
grid[i * 32 + colIdx] = ((colData & 1 << i)) >> i;
}
// The bit mask is a quick and dirty way of computing the positive bounded %32
uint8_t leftIdx = ((colIdx - 1) & 0x1f);
uint8_t rightIdx = ((colIdx + 1) & 0x1f);
for (int g = 0; g < numGenerations; ++g) {
uint8_t lastSides = 0, lastMiddle = 0, thisSides = 0, thisMiddle = 0,
nextSides = 0, nextMiddle = 0;
// Get the nieghbors from the row below
lastSides = grid[31 * 32 + leftIdx] & 1;
lastSides += grid[31 * 32 + rightIdx] & 1;
lastMiddle = grid[31 * 32 + colIdx];
// Get the neighbors in this row and the cell itself
thisSides = grid[leftIdx] & 1;
thisSides += grid[rightIdx] & 1;
thisMiddle = grid[colIdx];
// Perform cellular automata
for (int i = 0; i < 31; ++i) {
// Get the neighbors in the next row
nextSides = grid[(i + 1) * 32 + leftIdx] & 1;
nextSides += grid[(i + 1) * 32 + rightIdx] & 1;
nextMiddle = grid[(i + 1) * 32 + colIdx];
// Calculate the numbers of neighbors still alive
uint8_t neighbors =
lastSides + lastMiddle + thisSides + nextSides + nextMiddle;
// Write the next state directly to the memory location already allocated
// for this square, just in a differnt bit
// TODO Maybe just make this a macro?
grid[i * 32 + colIdx] |=
(~neighbors >> 1 & neighbors & (thisMiddle | neighbors) << 1) & 2;
// The current row becomes the last row, mutatis mutandis for the next row
lastSides = thisSides;
lastMiddle = thisMiddle;
thisSides = nextSides;
thisMiddle = nextMiddle;
}
// The next row for the last row in the cell will be the dame as the first
// row
nextSides = grid[leftIdx] & 1;
nextSides += grid[rightIdx] & 1;
nextMiddle = grid[colIdx] & 1;
// Compute the number of neighbors for this row
uint8_t neighbors =
lastSides + lastMiddle + thisSides + nextSides + nextMiddle;
// Write the next state directly to the memory location already allocated
// for this square, just in a differnt bit
grid[31 * 32 + colIdx] |=
(~neighbors >> 1 & neighbors & (thisMiddle | neighbors) << 1) & 2;
// Make sure all threads have finished the current generation before starting the next generation
__syncthreads();
// Shift the next state of the cell into the current state of the cell
for (int i = 0; i < 32; ++i) {
grid[i * 32 + colIdx] >>= 1;
}
}
// Clear the register to store compacted data
colData = 0;
// Cram the data back into a single value
for (int i = 0; i < 32; ++i) {
colData |= ((grid[i * 32 + colIdx]) & 1) << i;
}
// Copy the data back into global memory
cols[colIdx] = colData;
}
void generateGrid(uint32_t *&cols) {
uint32_t seed = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
srand(seed);
for (int i = 0; i < 32; ++i) {
cols[i] = rand() & rand() & 0xFFFFFFFF;
}
}
void drawGrid(uint32_t *col, int generation) {
printf("\033[H");
for (int y = 0; y < 32; ++y) {
// printf("\n\033[1;%dH", y+1);
for (int x = 0; x < 32; ++x)
printf((col[x] & (1l << y)) ? "██" : " ");
printf("\n");
}
printf("%d ", generation);
usleep(SLEEP_TIME);
}
int main(int argc, char **argv) {
if (argc > 1)
GENERATION_STEP = std::stoi(argv[1]);
if (argc > 2)
SLEEP_TIME = std::stoi(argv[2]);
uint32_t *cols;
uint32_t generation = 0;
cudaMallocManaged(&cols, sizeof(uint32_t) * 32);
generateGrid(cols);
drawGrid(cols, generation);
while (true) {
singleBlockLifeKernel<<<1, 32>>>(cols, GENERATION_STEP);
generation += GENERATION_STEP;
cudaDeviceSynchronize();
drawGrid(cols, generation);
}
cudaFree(cols);
return 0;
}