rev |
line source |
nuclear@0
|
1 #include <stdio.h>
|
nuclear@8
|
2 #include <string.h>
|
nuclear@2
|
3 #include <math.h>
|
nuclear@0
|
4 #include <assert.h>
|
nuclear@47
|
5 #include "rt.h"
|
John@14
|
6 #include "ogl.h"
|
nuclear@0
|
7 #include "ocl.h"
|
nuclear@22
|
8 #include "scene.h"
|
nuclear@32
|
9 #include "timer.h"
|
nuclear@45
|
10 #include "common.h"
|
nuclear@0
|
11
|
nuclear@12
|
12 // kernel arguments
|
nuclear@12
|
13 enum {
|
nuclear@12
|
14 KARG_FRAMEBUFFER,
|
nuclear@12
|
15 KARG_RENDER_INFO,
|
nuclear@12
|
16 KARG_FACES,
|
nuclear@12
|
17 KARG_MATLIB,
|
nuclear@12
|
18 KARG_LIGHTS,
|
nuclear@12
|
19 KARG_PRIM_RAYS,
|
nuclear@12
|
20 KARG_XFORM,
|
John@14
|
21 KARG_INVTRANS_XFORM,
|
nuclear@28
|
22 KARG_KDTREE,
|
John@14
|
23
|
John@14
|
24 NUM_KERNEL_ARGS
|
nuclear@12
|
25 };
|
John@11
|
26
|
nuclear@47
|
27 static void update_render_info();
|
nuclear@3
|
28 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg);
|
nuclear@43
|
29 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret);
|
nuclear@3
|
30
|
nuclear@13
|
31 static Face *faces;
|
nuclear@3
|
32 static Ray *prim_rays;
|
nuclear@3
|
33 static CLProgram *prog;
|
nuclear@3
|
34 static int global_size;
|
nuclear@3
|
35
|
nuclear@7
|
36
|
nuclear@4
|
37 static RendInfo rinf;
|
nuclear@55
|
38 static RenderStats rstat;
|
nuclear@47
|
39 static int saved_iter_val;
|
nuclear@4
|
40
|
nuclear@43
|
41 static long timing_sample_sum;
|
nuclear@43
|
42 static long num_timing_samples;
|
nuclear@43
|
43
|
John@50
|
44 extern bool dbg_frame_time;
|
John@50
|
45
|
nuclear@4
|
46
|
nuclear@39
|
47 bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex)
|
nuclear@0
|
48 {
|
nuclear@4
|
49 // render info
|
nuclear@22
|
50 rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0;
|
nuclear@16
|
51 rinf.ambient[3] = 0.0;
|
nuclear@16
|
52
|
nuclear@4
|
53 rinf.xsz = xsz;
|
nuclear@4
|
54 rinf.ysz = ysz;
|
nuclear@13
|
55 rinf.num_faces = scn->get_num_faces();
|
nuclear@54
|
56 rinf.num_lights = scn->get_num_lights();
|
nuclear@47
|
57 rinf.max_iter = saved_iter_val = 6;
|
nuclear@47
|
58 rinf.cast_shadows = true;
|
nuclear@4
|
59
|
nuclear@3
|
60 /* calculate primary rays */
|
nuclear@3
|
61 prim_rays = new Ray[xsz * ysz];
|
nuclear@2
|
62
|
nuclear@2
|
63 for(int i=0; i<ysz; i++) {
|
nuclear@2
|
64 for(int j=0; j<xsz; j++) {
|
nuclear@2
|
65 prim_rays[i * xsz + j] = get_primary_ray(j, i, xsz, ysz, 45.0);
|
nuclear@2
|
66 }
|
nuclear@0
|
67 }
|
nuclear@54
|
68 dbg_set_primary_rays(prim_rays); // give them to the debug renderer
|
nuclear@0
|
69
|
nuclear@2
|
70 /* setup opencl */
|
nuclear@3
|
71 prog = new CLProgram("render");
|
nuclear@54
|
72 if(!prog->load("src/rt.cl")) {
|
nuclear@8
|
73 return false;
|
nuclear@0
|
74 }
|
nuclear@0
|
75
|
nuclear@24
|
76 if(!(faces = (Face*)scn->get_face_buffer())) {
|
nuclear@13
|
77 fprintf(stderr, "failed to create face buffer\n");
|
nuclear@13
|
78 return false;
|
nuclear@13
|
79 }
|
nuclear@13
|
80
|
nuclear@28
|
81 const KDNodeGPU *kdbuf = scn->get_kdtree_buffer();
|
nuclear@28
|
82 if(!kdbuf) {
|
nuclear@28
|
83 fprintf(stderr, "failed to create kdtree buffer\n");
|
nuclear@28
|
84 return false;
|
nuclear@28
|
85 }
|
nuclear@43
|
86
|
nuclear@43
|
87 int kdimg_xsz, kdimg_ysz;
|
nuclear@43
|
88 float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz);
|
nuclear@28
|
89
|
nuclear@3
|
90 /* setup argument buffers */
|
nuclear@41
|
91 #ifdef CLGL_INTEROP
|
nuclear@39
|
92 prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex);
|
nuclear@41
|
93 #else
|
nuclear@41
|
94 prog->set_arg_image(KARG_FRAMEBUFFER, ARG_WR, xsz, ysz);
|
nuclear@41
|
95 #endif
|
nuclear@12
|
96 prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf);
|
John@14
|
97 prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces);
|
John@14
|
98 prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials());
|
nuclear@54
|
99 prog->set_arg_buffer(KARG_LIGHTS, ARG_RD, scn->get_num_lights() * sizeof(Light), scn->get_lights());
|
nuclear@12
|
100 prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
|
nuclear@12
|
101 prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float));
|
nuclear@12
|
102 prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float));
|
nuclear@43
|
103 //prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf);
|
nuclear@43
|
104 prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels);
|
nuclear@43
|
105
|
nuclear@43
|
106 delete [] kdimg_pixels;
|
nuclear@43
|
107
|
nuclear@12
|
108
|
John@14
|
109 if(prog->get_num_args() < NUM_KERNEL_ARGS) {
|
John@14
|
110 return false;
|
John@14
|
111 }
|
John@14
|
112
|
nuclear@45
|
113 const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math";
|
nuclear@45
|
114 if(!prog->build(opt)) {
|
nuclear@16
|
115 return false;
|
nuclear@16
|
116 }
|
nuclear@16
|
117
|
nuclear@54
|
118 //delete [] prim_rays; now dbg_renderer handles them
|
nuclear@2
|
119
|
nuclear@3
|
120 global_size = xsz * ysz;
|
nuclear@54
|
121
|
nuclear@54
|
122
|
nuclear@54
|
123 init_dbg_renderer(xsz, ysz, scn, tex);
|
nuclear@3
|
124 return true;
|
nuclear@3
|
125 }
|
nuclear@3
|
126
|
nuclear@3
|
127 void destroy_renderer()
|
nuclear@3
|
128 {
|
nuclear@3
|
129 delete prog;
|
nuclear@43
|
130
|
nuclear@54
|
131 destroy_dbg_renderer();
|
nuclear@54
|
132
|
nuclear@54
|
133 if(num_timing_samples) {
|
nuclear@54
|
134 printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples);
|
nuclear@54
|
135 }
|
nuclear@3
|
136 }
|
nuclear@3
|
137
|
nuclear@3
|
138 bool render()
|
nuclear@3
|
139 {
|
nuclear@39
|
140 // XXX do we need to call glFinish ?
|
nuclear@39
|
141
|
nuclear@32
|
142 long tm0 = get_msec();
|
nuclear@32
|
143
|
nuclear@40
|
144 #ifdef CLGL_INTEROP
|
nuclear@39
|
145 cl_event ev;
|
nuclear@39
|
146 CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
|
nuclear@39
|
147
|
nuclear@39
|
148 if(!acquire_gl_object(texbuf, &ev)) {
|
nuclear@39
|
149 return false;
|
nuclear@39
|
150 }
|
nuclear@39
|
151
|
nuclear@39
|
152 // make sure that we will wait for the acquire to finish before running
|
nuclear@39
|
153 prog->set_wait_event(ev);
|
nuclear@40
|
154 #endif
|
nuclear@39
|
155
|
nuclear@3
|
156 if(!prog->run(1, global_size)) {
|
nuclear@3
|
157 return false;
|
nuclear@0
|
158 }
|
John@15
|
159
|
nuclear@40
|
160 #ifdef CLGL_INTEROP
|
nuclear@39
|
161 if(!release_gl_object(texbuf, &ev)) {
|
nuclear@39
|
162 return false;
|
nuclear@39
|
163 }
|
nuclear@39
|
164 clWaitForEvents(1, &ev);
|
nuclear@40
|
165 #endif
|
nuclear@39
|
166
|
nuclear@40
|
167 #ifndef CLGL_INTEROP
|
nuclear@40
|
168 /* if we don't compile in CL/GL interoperability support, we need
|
nuclear@40
|
169 * to copy the output buffer to the OpenGL texture used to displaying
|
nuclear@40
|
170 * the image.
|
nuclear@40
|
171 */
|
nuclear@13
|
172 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
|
nuclear@12
|
173 void *fb = map_mem_buffer(mbuf, MAP_RD);
|
nuclear@13
|
174 if(!fb) {
|
nuclear@13
|
175 fprintf(stderr, "FAILED\n");
|
nuclear@13
|
176 return false;
|
nuclear@13
|
177 }
|
nuclear@13
|
178
|
nuclear@12
|
179 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rinf.xsz, rinf.ysz, GL_RGBA, GL_FLOAT, fb);
|
nuclear@2
|
180 unmap_mem_buffer(mbuf);
|
nuclear@40
|
181 #endif
|
nuclear@32
|
182
|
nuclear@43
|
183 long msec = get_msec() - tm0;
|
nuclear@43
|
184 timing_sample_sum += msec;
|
nuclear@43
|
185 num_timing_samples++;
|
nuclear@43
|
186
|
John@50
|
187 if(dbg_frame_time) {
|
John@50
|
188 printf("rendered in %ld msec\n", msec);
|
John@50
|
189 }
|
nuclear@3
|
190 return true;
|
nuclear@0
|
191 }
|
nuclear@2
|
192
|
nuclear@12
|
193
|
nuclear@12
|
194 void set_xform(float *matrix, float *invtrans)
|
nuclear@12
|
195 {
|
nuclear@12
|
196 CLMemBuffer *mbuf_xform = prog->get_arg_buffer(KARG_XFORM);
|
nuclear@12
|
197 CLMemBuffer *mbuf_invtrans = prog->get_arg_buffer(KARG_INVTRANS_XFORM);
|
nuclear@12
|
198 assert(mbuf_xform && mbuf_invtrans);
|
nuclear@12
|
199
|
nuclear@12
|
200 float *mem = (float*)map_mem_buffer(mbuf_xform, MAP_WR);
|
nuclear@12
|
201 memcpy(mem, matrix, 16 * sizeof *mem);
|
nuclear@12
|
202 unmap_mem_buffer(mbuf_xform);
|
nuclear@12
|
203
|
nuclear@12
|
204 mem = (float*)map_mem_buffer(mbuf_invtrans, MAP_WR);
|
nuclear@12
|
205 memcpy(mem, invtrans, 16 * sizeof *mem);
|
nuclear@12
|
206 unmap_mem_buffer(mbuf_invtrans);
|
nuclear@8
|
207 }
|
nuclear@8
|
208
|
nuclear@54
|
209
|
nuclear@54
|
210 const RendInfo *get_render_info()
|
nuclear@54
|
211 {
|
nuclear@54
|
212 return &rinf;
|
nuclear@54
|
213 }
|
nuclear@54
|
214
|
nuclear@55
|
215 const RenderStats *get_render_stats()
|
nuclear@55
|
216 {
|
nuclear@55
|
217 return &rstat;
|
nuclear@55
|
218 }
|
nuclear@55
|
219
|
nuclear@55
|
220 void print_render_stats(FILE *fp)
|
nuclear@55
|
221 {
|
nuclear@55
|
222 fprintf(fp, "-- render stats --\n");
|
nuclear@55
|
223 fprintf(fp, "> timing\n");
|
nuclear@55
|
224 fprintf(fp, " render time (msec): %lu\n", rstat.render_time);
|
nuclear@55
|
225 fprintf(fp, " tex update time (msec): %lu\n", rstat.tex_update_time);
|
nuclear@55
|
226 fprintf(fp, "> counters\n");
|
nuclear@55
|
227 fprintf(fp, " AABB tests: %d\n", rstat.aabb_tests);
|
nuclear@55
|
228 fprintf(fp, " AABB tests per ray (min/max/avg): %d/%d/%f\n",
|
nuclear@55
|
229 rstat.min_aabb_tests, rstat.max_aabb_tests, rstat.avg_aabb_tests);
|
nuclear@55
|
230 fprintf(fp, " triangle tests: %d\n", rstat.triangle_tests);
|
nuclear@55
|
231 fprintf(fp, " triangle tests per ray (min/max/avg): %d/%d/%f\n",
|
nuclear@55
|
232 rstat.min_triangle_tests, rstat.max_triangle_tests, rstat.avg_triangle_tests);
|
nuclear@55
|
233 fprintf(fp, " rays cast: %dp %dr %ds (sum: %d)\n", rstat.prim_rays,
|
nuclear@55
|
234 rstat.refl_rays, rstat.shadow_rays, rstat.rays_cast);
|
nuclear@55
|
235 fprintf(fp, " rays per second: %d\n", rstat.rays_per_sec);
|
nuclear@55
|
236 fprintf(fp, " BRDF evaluations: %d\n", rstat.brdf_evals);
|
nuclear@55
|
237 fputc('\n', fp);
|
nuclear@55
|
238 }
|
nuclear@55
|
239
|
nuclear@47
|
240 void set_render_option(int opt, bool val)
|
nuclear@47
|
241 {
|
nuclear@47
|
242 switch(opt) {
|
nuclear@47
|
243 case ROPT_ITER:
|
nuclear@47
|
244 case ROPT_REFL:
|
nuclear@47
|
245 rinf.max_iter = val ? saved_iter_val : 0;
|
nuclear@47
|
246 break;
|
nuclear@47
|
247
|
nuclear@47
|
248 case ROPT_SHAD:
|
nuclear@47
|
249 rinf.cast_shadows = val;
|
nuclear@47
|
250 break;
|
nuclear@47
|
251
|
nuclear@47
|
252 default:
|
nuclear@47
|
253 return;
|
nuclear@47
|
254 }
|
nuclear@47
|
255
|
nuclear@47
|
256 update_render_info();
|
nuclear@47
|
257 }
|
nuclear@47
|
258
|
nuclear@47
|
259 void set_render_option(int opt, int val)
|
nuclear@47
|
260 {
|
nuclear@47
|
261 switch(opt) {
|
nuclear@47
|
262 case ROPT_ITER:
|
nuclear@47
|
263 rinf.max_iter = saved_iter_val = val;
|
nuclear@47
|
264 break;
|
nuclear@47
|
265
|
nuclear@47
|
266 case ROPT_SHAD:
|
nuclear@47
|
267 rinf.cast_shadows = val;
|
nuclear@47
|
268 break;
|
nuclear@47
|
269
|
nuclear@47
|
270 case ROPT_REFL:
|
nuclear@47
|
271 rinf.max_iter = val ? saved_iter_val : 0;
|
nuclear@47
|
272 break;
|
nuclear@47
|
273
|
nuclear@47
|
274 default:
|
nuclear@47
|
275 return;
|
nuclear@47
|
276 }
|
nuclear@47
|
277
|
nuclear@47
|
278 update_render_info();
|
nuclear@47
|
279 }
|
nuclear@47
|
280
|
nuclear@47
|
281 void set_render_option(int opt, float val)
|
nuclear@47
|
282 {
|
nuclear@47
|
283 set_render_option(opt, (int)val);
|
nuclear@47
|
284 }
|
nuclear@47
|
285
|
nuclear@47
|
286 bool get_render_option_bool(int opt)
|
nuclear@47
|
287 {
|
nuclear@47
|
288 switch(opt) {
|
nuclear@47
|
289 case ROPT_ITER:
|
nuclear@47
|
290 return rinf.max_iter;
|
nuclear@47
|
291 case ROPT_SHAD:
|
nuclear@47
|
292 return rinf.cast_shadows;
|
nuclear@47
|
293 case ROPT_REFL:
|
nuclear@47
|
294 return rinf.max_iter == saved_iter_val;
|
nuclear@47
|
295 default:
|
nuclear@47
|
296 break;
|
nuclear@47
|
297 }
|
nuclear@47
|
298 return false;
|
nuclear@47
|
299 }
|
nuclear@47
|
300
|
nuclear@47
|
301 int get_render_option_int(int opt)
|
nuclear@47
|
302 {
|
nuclear@47
|
303 switch(opt) {
|
nuclear@47
|
304 case ROPT_ITER:
|
nuclear@47
|
305 return rinf.max_iter;
|
nuclear@47
|
306 case ROPT_SHAD:
|
nuclear@47
|
307 return rinf.cast_shadows ? 1 : 0;
|
nuclear@47
|
308 case ROPT_REFL:
|
nuclear@47
|
309 return rinf.max_iter == saved_iter_val ? 1 : 0;
|
nuclear@47
|
310 default:
|
nuclear@47
|
311 break;
|
nuclear@47
|
312 }
|
nuclear@47
|
313 return -1;
|
nuclear@47
|
314 }
|
nuclear@47
|
315
|
nuclear@47
|
316 float get_render_option_float(int opt)
|
nuclear@47
|
317 {
|
nuclear@47
|
318 return (float)get_render_option_int(opt);
|
nuclear@47
|
319 }
|
nuclear@47
|
320
|
nuclear@47
|
321 static void update_render_info()
|
nuclear@47
|
322 {
|
nuclear@47
|
323 if(!prog) {
|
nuclear@47
|
324 return;
|
nuclear@47
|
325 }
|
nuclear@47
|
326
|
nuclear@47
|
327 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_RENDER_INFO);
|
nuclear@47
|
328 assert(mbuf);
|
nuclear@47
|
329
|
nuclear@47
|
330 RendInfo *rinf_ptr = (RendInfo*)map_mem_buffer(mbuf, MAP_WR);
|
nuclear@47
|
331 *rinf_ptr = rinf;
|
nuclear@47
|
332 unmap_mem_buffer(mbuf);
|
nuclear@47
|
333 }
|
nuclear@47
|
334
|
nuclear@3
|
335 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg)
|
nuclear@2
|
336 {
|
nuclear@2
|
337 float vfov = M_PI * vfov_deg / 180.0;
|
nuclear@2
|
338 float aspect = (float)w / (float)h;
|
nuclear@2
|
339
|
nuclear@2
|
340 float ysz = 2.0;
|
nuclear@2
|
341 float xsz = aspect * ysz;
|
nuclear@2
|
342
|
nuclear@2
|
343 float px = ((float)x / (float)w) * xsz - xsz / 2.0;
|
nuclear@2
|
344 float py = 1.0 - ((float)y / (float)h) * ysz;
|
nuclear@2
|
345 float pz = 1.0 / tan(0.5 * vfov);
|
nuclear@2
|
346
|
nuclear@43
|
347 float mag = sqrt(px * px + py * py + pz * pz);
|
nuclear@43
|
348
|
nuclear@45
|
349 px = px * RAY_MAG / mag;
|
nuclear@45
|
350 py = py * RAY_MAG / mag;
|
nuclear@45
|
351 pz = pz * RAY_MAG / mag;
|
nuclear@2
|
352
|
nuclear@18
|
353 Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}};
|
nuclear@2
|
354 return ray;
|
nuclear@2
|
355 }
|
nuclear@43
|
356
|
nuclear@54
|
357 #define MIN(a, b) ((a) < (b) ? (a) : (b))
|
nuclear@54
|
358
|
nuclear@43
|
359 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret)
|
nuclear@43
|
360 {
|
nuclear@45
|
361 int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT);
|
nuclear@45
|
362 int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1;
|
nuclear@45
|
363 int xsz = KDIMG_NODE_WIDTH * columns;
|
nuclear@43
|
364
|
nuclear@43
|
365 printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes);
|
nuclear@43
|
366
|
nuclear@43
|
367 float *img = new float[4 * xsz * ysz];
|
nuclear@43
|
368 memset(img, 0, 4 * xsz * ysz * sizeof *img);
|
nuclear@43
|
369
|
nuclear@43
|
370 for(int i=0; i<num_nodes; i++) {
|
nuclear@45
|
371 int x = KDIMG_NODE_WIDTH * (i / KDIMG_MAX_HEIGHT);
|
nuclear@45
|
372 int y = i % KDIMG_MAX_HEIGHT;
|
nuclear@45
|
373
|
nuclear@45
|
374 float *ptr = img + (y * xsz + x) * 4;
|
nuclear@43
|
375
|
nuclear@43
|
376 *ptr++ = kdtree[i].aabb.min[0];
|
nuclear@43
|
377 *ptr++ = kdtree[i].aabb.min[1];
|
nuclear@43
|
378 *ptr++ = kdtree[i].aabb.min[2];
|
nuclear@43
|
379 *ptr++ = 0.0;
|
nuclear@43
|
380
|
nuclear@43
|
381 *ptr++ = kdtree[i].aabb.max[0];
|
nuclear@43
|
382 *ptr++ = kdtree[i].aabb.max[1];
|
nuclear@43
|
383 *ptr++ = kdtree[i].aabb.max[2];
|
nuclear@43
|
384 *ptr++ = 0.0;
|
nuclear@43
|
385
|
nuclear@43
|
386 for(int j=0; j<MAX_NODE_FACES; j++) {
|
nuclear@43
|
387 *ptr++ = j < kdtree[i].num_faces ? (float)kdtree[i].face_idx[j] : 0.0f;
|
nuclear@43
|
388 }
|
nuclear@43
|
389
|
nuclear@43
|
390 *ptr++ = (float)kdtree[i].num_faces;
|
nuclear@43
|
391 *ptr++ = (float)kdtree[i].left;
|
nuclear@43
|
392 *ptr++ = (float)kdtree[i].right;
|
nuclear@43
|
393 *ptr++ = 0.0;
|
nuclear@43
|
394 }
|
nuclear@43
|
395
|
nuclear@43
|
396 if(xsz_ret) *xsz_ret = xsz;
|
nuclear@43
|
397 if(ysz_ret) *ysz_ret = ysz;
|
nuclear@43
|
398 return img;
|
nuclear@43
|
399 }
|