-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.cu
349 lines (263 loc) · 9.03 KB
/
main.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
//
// main.c
// CudaRasterizer
//
// Created by Robert Crosby on 4/21/12.
// Copyright (c) 2012 In-Con. All rights reserved.
//
#include <stdio.h>
#include <string.h>
#ifdef __APPLE__
#include <CUDA/CUDA.h>
#endif
#ifdef __unix__
#include <cuda.h>
#endif
#include "structures.h"
#include "mesh_loader.h"
#include "rasterizer.h"
#include "blur_filter.h"
#include "png_loader.h"
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include "hrt.h"
#define STR_SIZE 24
#define SETUP_INDEX 0
#define NORMALS_INDEX 1
#define TRANSLATE_INDEX 2
#define LIGHT_INDEX 3
#define BUFFER_INDEX 4
#define RASTERIZE_INDEX 5
#define BLUR_INDEX 6
#define CLEANUP_INDEX 7
#define TOTAL_INDEX 8
#define TIME_COUNT 9
#define SCALE_TO_SCREEN 0.8
#define DEF_SIZE 1000
static const char *DEF_MESH = "monkey_high.m";
static const char *DEF_IMAGE = "test.png";
static char *timeNames[TIME_COUNT] = {"setup", "calculate normals",
"translations", "light vertices", "create buffers", "rasterize mesh",
"blur image", "cleanup", "total"};
static uint64_t times[TIME_COUNT];
void printTimes() {
int ndx;
char str[STR_SIZE];
register uint64_t total = 0;
for (ndx = 0; ndx < TIME_COUNT-1; ++ndx) {
snprintf(str, STR_SIZE, "%" PRIu64 "ns", times[ndx]);
printf("%s: %s\n", timeNames[ndx], str);
total += times[ndx];
}
times[TOTAL_INDEX] = total;
snprintf(str, STR_SIZE, "%" PRIu64 "ns", times[ndx]);
printf("%s: %s\n", timeNames[ndx], str);
}
int render_mesh(const char *imageFile, const char *meshFile, int width, int height, int duplicates, int blur_iter) {
mesh_t mesh = {NULL, NULL, 0, NULL, NULL, 0, NULL, 0, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}};
vec3_t center;
mat4_t modelMtx;
float scale;
vec3_t lightDir = {-1.0, 1.0, 1.0};
vec3_t lightColor = {0.7, 0.7, 0.7};
drawbuffer_t buffers;
bitmap_t bitmap;
// load the mesh
load_m_mesh(&mesh, meshFile);
if (mesh.triangleCount == 0)
return 1;
//printf("triangiles: %d\n", mesh.triangleCount);
times[SETUP_INDEX] = 0;
hrt_start();
mesh_set_normals(&mesh);
hrt_stop();
times[NORMALS_INDEX] = hrt_result();
// create the transforms and apply to mesh
center = vec3_add(&mesh.high, &mesh.low);
center = vec3_scale(¢er, -0.5);
scale = mesh.high.x - mesh.low.x;
scale = 1 / scale;
modelMtx = mat4_translation(center.x, center.y, center.z);
mat4_scale3f(&modelMtx, scale, -scale, scale);
mat4_translate3f(&modelMtx, 0.5f, 0.5f, 1.0f);
hrt_start();
mesh_translate_locations(&mesh, &modelMtx);
hrt_stop();
times[TRANSLATE_INDEX] = hrt_result();
hrt_start();
// light the vertices
mesh_light_directional(&mesh, &lightDir, &lightColor);
hrt_stop();
times[LIGHT_INDEX] = hrt_result();
hrt_start();
// create the color and z buffers
buffers.width = width;
buffers.height = height;
buffers.colorBuffer = (color_t *) malloc(width * height * sizeof(color_t));
buffers.zBuffer = (float *) malloc(width * height * sizeof(float));
hrt_stop();
times[BUFFER_INDEX] = hrt_result();
hrt_start();
// draw the mesh
rasterize_mesh(&buffers, &mesh, duplicates);
hrt_stop();
times[RASTERIZE_INDEX] = hrt_result();
free(mesh.vertices);
free(mesh.triangles);
free(buffers.zBuffer);
hrt_start();
// create a bit map
bitmap.width = buffers.width;
bitmap.height = buffers.height;
bitmap.pixels = buffers.colorBuffer;
// blur the bit map
blur_bitmap(&bitmap, blur_iter);
hrt_stop();
times[BLUR_INDEX] = hrt_result();
times[CLEANUP_INDEX] = 0;
// write to file
save_png_to_file(&bitmap, imageFile);
free(buffers.colorBuffer);
return 0;
}
int render_mesh_cuda(const char *imageFile, const char *meshFile, int width, int height, int duplicates, int blur_iter) {
mesh_t mesh = {NULL, NULL, 0, NULL, NULL, 0, NULL, 0, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}};
size_t size;
vec3_t center;
mat4_t modelMtx;
float scale;
//float depth;
vec3_t lightDir = {-1.0, 1.0, 1.0};
vec3_t lightColor = {0.7, 0.7, 0.7};
drawbuffer_t buffers;
bitmap_t bitmap;
// load the mesh
load_m_mesh(&mesh, meshFile);
if (mesh.triangleCount == 0)
return 1;
hrt_start();
size = mesh.vertexCount * sizeof(vertex_t);
if (cudaMalloc((void **) &mesh.d_vertices, size) == cudaErrorMemoryAllocation)
printf("error creating memory for vertices\n");
cudaMemcpy(mesh.d_vertices, mesh.vertices, size, cudaMemcpyHostToDevice);
free(mesh.vertices);
size = mesh.triangleCount * sizeof(ivec3_t);
if (cudaMalloc((void **) &mesh.d_triangles, size) == cudaErrorMemoryAllocation)
printf("error creating memory for triangles\n");
cudaMemcpy(mesh.d_triangles, mesh.triangles, size, cudaMemcpyHostToDevice);
free(mesh.triangles);
// allocate the polygons
mesh.polygonCount = mesh.triangleCount;
size = mesh.polygonCount * sizeof(polygon_t);
if (cudaMalloc((void **) &mesh.d_polygons, size) == cudaErrorMemoryAllocation)
printf("error creating memory for polygons\n");
hrt_stop();
times[SETUP_INDEX] = hrt_result();
hrt_start();
// set the normals of the vertices
mesh_set_normals_cuda(&mesh);
// create the transforms and apply to mesh
center = vec3_add(&mesh.high, &mesh.low);
center = vec3_scale(¢er, -0.5);
scale = mesh.high.x - mesh.low.x;
scale = 1 / scale;
hrt_stop();
times[NORMALS_INDEX] = hrt_result();
hrt_start();
modelMtx = mat4_translation(center.x, center.y, center.z);
mat4_scale3f(&modelMtx, scale, -scale, scale);
mat4_translate3f(&modelMtx, 0.5f, 0.5f, 1.0);
mesh_translate_locations_cuda(&mesh, &modelMtx);
hrt_stop();
times[TRANSLATE_INDEX] = hrt_result();
hrt_start();
// light the vertices
mesh_light_directional_cuda(&mesh, &lightDir, &lightColor);
hrt_stop();
times[LIGHT_INDEX] = hrt_result();
hrt_start();
// create the buffers
buffers.width = width;
buffers.height = height;
// create a color buffer on the device
size = width * height * sizeof(int);
//printf("size: %d\nwidth * height: %d\nsize of color_t: %d\n", size, width * height, sizeof(color_t));
if (cudaMalloc((void **) &buffers.d_colorBuffer, size) == cudaErrorMemoryAllocation)
printf("error creating color buffer\n");
// create a depth buffer on the device
size = width * height * sizeof(float);
if (cudaMalloc((void **) &buffers.d_zBuffer, size) == cudaErrorMemoryAllocation)
printf("error creating depth buffer\n");
// create a lock buffer on the device
size = width * height * sizeof(int);
if (cudaMalloc((void **) &buffers.d_locks, size) == cudaErrorMemoryAllocation)
printf("error creating lock buffer\n");
// clear the buffers
clear_buffers_cuda(&buffers);
hrt_stop();
times[BUFFER_INDEX] = hrt_result();
hrt_start();
// rasterize the polygons
rasterize_mesh_cuda(&buffers, &mesh, duplicates);
hrt_stop();
times[RASTERIZE_INDEX] = hrt_result();
hrt_start();
// copy the color buffer to host
size = width * height * sizeof(int);
buffers.colorBuffer = (color_t *) malloc(size);
cudaMemcpy(buffers.colorBuffer, buffers.d_colorBuffer, size, cudaMemcpyDeviceToHost);
// free the buffers on the device
cudaFree(buffers.d_colorBuffer);
cudaFree(buffers.d_zBuffer);
// free the polygons, vertices, and triangles on the device
cudaFree(mesh.d_polygons);
cudaFree(mesh.d_vertices);
cudaFree(mesh.d_triangles);
hrt_stop();
times[CLEANUP_INDEX] = hrt_result();
hrt_start();
// put together the bit map
bitmap.width = buffers.width;
bitmap.height = buffers.height;
bitmap.pixels = buffers.colorBuffer;
// blur the bit map
blur_bitmap_cuda(&bitmap, blur_iter);
hrt_stop();
times[BLUR_INDEX] = hrt_result();
// write the bitmap to a file
save_png_to_file(&bitmap, imageFile);
// free the host color buffer
free(buffers.colorBuffer);
return 0;
}
int main(int argc, const char * argv[])
{
const char *meshFile = DEF_MESH;
const char *imageFile = DEF_IMAGE;
int i, width, height, useCuda = 0, duplicates = 1, blur = 1, profile = 0;
width = height = DEF_SIZE;
for (i = 0; i < argc; ++i) {
if (strstr(argv[i], "-i") != NULL && ++i < argc)
meshFile = argv[i];
else if (strstr(argv[i], "-o") != NULL && ++i < argc)
imageFile = argv[i];
else if (strstr(argv[i], "-w") != NULL && ++i < argc)
sscanf(argv[i], "%d", &width);
else if (strstr(argv[i], "-h") != NULL && ++i < argc)
sscanf(argv[i], "%d", &height);
else if (strstr(argv[i], "-cuda") != NULL)
useCuda = 1;
else if (strstr(argv[i], "-n") != NULL && ++i < argc)
sscanf(argv[i], "%d", &duplicates);
else if (strstr(argv[i], "-blur") != NULL && ++i < argc)
sscanf(argv[i], "%d", &blur);
else if (strstr(argv[i], "-t") != NULL)
profile = 1;
}
if (useCuda)
render_mesh_cuda(imageFile, meshFile, width, height, duplicates, blur);
else
render_mesh(imageFile, meshFile, width, height, duplicates, blur);
if (profile) printTimes();
return EXIT_SUCCESS;
}