clray

view src/rt.cc @ 56:e3b4457dc4d2

added glFinish after swap-buffers to make the program absolutely correct in regards to mediating usage of the shared GL/CL texture image
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 06 Sep 2010 05:40:47 +0100
parents 55b30d8b6805
children 6a30f27fa1e6
line source
1 #include <stdio.h>
2 #include <string.h>
3 #include <math.h>
4 #include <assert.h>
5 #include "rt.h"
6 #include "ogl.h"
7 #include "ocl.h"
8 #include "scene.h"
9 #include "timer.h"
10 #include "common.h"
12 // kernel arguments
13 enum {
14 KARG_FRAMEBUFFER,
15 KARG_RENDER_INFO,
16 KARG_FACES,
17 KARG_MATLIB,
18 KARG_LIGHTS,
19 KARG_PRIM_RAYS,
20 KARG_XFORM,
21 KARG_INVTRANS_XFORM,
22 KARG_KDTREE,
24 NUM_KERNEL_ARGS
25 };
27 struct RendInfo {
28 float ambient[4];
29 int xsz, ysz;
30 int num_faces, num_lights;
31 int max_iter;
32 int cast_shadows;
33 };
35 struct Ray {
36 float origin[4], dir[4];
37 };
39 struct Light {
40 float pos[4], color[4];
41 };
43 static void update_render_info();
44 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg);
45 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret);
47 static Face *faces;
48 static Ray *prim_rays;
49 static CLProgram *prog;
50 static int global_size;
52 static Light lightlist[] = {
53 {{-8, 15, 18, 0}, {1, 1, 1, 1}}
54 };
57 static RendInfo rinf;
58 static int saved_iter_val;
60 static long timing_sample_sum;
61 static long num_timing_samples;
63 extern bool dbg_frame_time;
66 bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex)
67 {
68 // render info
69 rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0;
70 rinf.ambient[3] = 0.0;
72 rinf.xsz = xsz;
73 rinf.ysz = ysz;
74 rinf.num_faces = scn->get_num_faces();
75 rinf.num_lights = sizeof lightlist / sizeof *lightlist;
76 rinf.max_iter = saved_iter_val = 6;
77 rinf.cast_shadows = true;
79 /* calculate primary rays */
80 prim_rays = new Ray[xsz * ysz];
82 for(int i=0; i<ysz; i++) {
83 for(int j=0; j<xsz; j++) {
84 prim_rays[i * xsz + j] = get_primary_ray(j, i, xsz, ysz, 45.0);
85 }
86 }
88 /* setup opencl */
89 prog = new CLProgram("render");
90 if(!prog->load("rt.cl")) {
91 return false;
92 }
94 if(!(faces = (Face*)scn->get_face_buffer())) {
95 fprintf(stderr, "failed to create face buffer\n");
96 return false;
97 }
99 const KDNodeGPU *kdbuf = scn->get_kdtree_buffer();
100 if(!kdbuf) {
101 fprintf(stderr, "failed to create kdtree buffer\n");
102 return false;
103 }
105 int kdimg_xsz, kdimg_ysz;
106 float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz);
108 /* setup argument buffers */
109 #ifdef CLGL_INTEROP
110 prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex);
111 #else
112 prog->set_arg_image(KARG_FRAMEBUFFER, ARG_WR, xsz, ysz);
113 #endif
114 prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf);
115 prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces);
116 prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials());
117 prog->set_arg_buffer(KARG_LIGHTS, ARG_RD, sizeof lightlist, lightlist);
118 prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
119 prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float));
120 prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float));
121 //prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf);
122 prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels);
124 delete [] kdimg_pixels;
127 if(prog->get_num_args() < NUM_KERNEL_ARGS) {
128 return false;
129 }
131 const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math";
132 if(!prog->build(opt)) {
133 return false;
134 }
136 delete [] prim_rays;
138 global_size = xsz * ysz;
139 return true;
140 }
142 void destroy_renderer()
143 {
144 delete prog;
146 printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples);
147 }
149 bool render()
150 {
151 // XXX do we need to call glFinish ?
153 long tm0 = get_msec();
155 #ifdef CLGL_INTEROP
156 cl_event ev;
157 CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
159 if(!acquire_gl_object(texbuf, &ev)) {
160 return false;
161 }
163 // make sure that we will wait for the acquire to finish before running
164 prog->set_wait_event(ev);
165 #endif
167 if(!prog->run(1, global_size)) {
168 return false;
169 }
171 #ifdef CLGL_INTEROP
172 if(!release_gl_object(texbuf, &ev)) {
173 return false;
174 }
175 clWaitForEvents(1, &ev);
176 #endif
178 #ifndef CLGL_INTEROP
179 /* if we don't compile in CL/GL interoperability support, we need
180 * to copy the output buffer to the OpenGL texture used to displaying
181 * the image.
182 */
183 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
184 void *fb = map_mem_buffer(mbuf, MAP_RD);
185 if(!fb) {
186 fprintf(stderr, "FAILED\n");
187 return false;
188 }
190 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rinf.xsz, rinf.ysz, GL_RGBA, GL_FLOAT, fb);
191 unmap_mem_buffer(mbuf);
192 #endif
194 long msec = get_msec() - tm0;
195 timing_sample_sum += msec;
196 num_timing_samples++;
198 if(dbg_frame_time) {
199 printf("rendered in %ld msec\n", msec);
200 }
201 return true;
202 }
204 #define MIN(a, b) ((a) < (b) ? (a) : (b))
205 static void dbg_set_gl_material(Material *mat)
206 {
207 static Material def_mat = {{0.7, 0.7, 0.7, 1}, {0, 0, 0, 0}, 0, 0, 0};
209 if(!mat) mat = &def_mat;
211 glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, mat->kd);
212 glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, mat->ks);
213 glMaterialf(GL_FRONT_AND_BACK, GL_SHININESS, MIN(mat->spow, 128.0f));
214 }
216 void dbg_render_gl(Scene *scn, bool show_tree, bool show_obj)
217 {
218 glPushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT | GL_LIGHTING_BIT);
220 for(int i=0; i<rinf.num_lights; i++) {
221 float lpos[4];
223 memcpy(lpos, lightlist[i].pos, sizeof lpos);
224 lpos[3] = 1.0;
226 glLightfv(GL_LIGHT0 + i, GL_POSITION, lpos);
227 glLightfv(GL_LIGHT0 + i, GL_DIFFUSE, lightlist[i].color);
228 glEnable(GL_LIGHT0 + i);
229 }
231 glDisable(GL_TEXTURE_2D);
232 glEnable(GL_DEPTH_TEST);
233 glEnable(GL_LIGHTING);
235 glMatrixMode(GL_PROJECTION);
236 glPushMatrix();
237 glLoadIdentity();
238 gluPerspective(45.0, (float)rinf.xsz / (float)rinf.ysz, 0.5, 1000.0);
240 if(show_obj) {
241 Material *materials = scn->get_materials();
243 int num_faces = scn->get_num_faces();
244 int cur_mat = -1;
246 for(int i=0; i<num_faces; i++) {
247 if(faces[i].matid != cur_mat) {
248 if(cur_mat != -1) {
249 glEnd();
250 }
251 dbg_set_gl_material(materials ? materials + faces[i].matid : 0);
252 cur_mat = faces[i].matid;
253 glBegin(GL_TRIANGLES);
254 }
256 for(int j=0; j<3; j++) {
257 glNormal3fv(faces[i].v[j].normal);
258 glVertex3fv(faces[i].v[j].pos);
259 }
260 }
261 glEnd();
262 }
264 if(show_tree) {
265 scn->draw_kdtree();
266 }
268 glPopMatrix();
269 glPopAttrib();
271 assert(glGetError() == GL_NO_ERROR);
272 }
274 void set_xform(float *matrix, float *invtrans)
275 {
276 CLMemBuffer *mbuf_xform = prog->get_arg_buffer(KARG_XFORM);
277 CLMemBuffer *mbuf_invtrans = prog->get_arg_buffer(KARG_INVTRANS_XFORM);
278 assert(mbuf_xform && mbuf_invtrans);
280 float *mem = (float*)map_mem_buffer(mbuf_xform, MAP_WR);
281 memcpy(mem, matrix, 16 * sizeof *mem);
282 unmap_mem_buffer(mbuf_xform);
284 mem = (float*)map_mem_buffer(mbuf_invtrans, MAP_WR);
285 memcpy(mem, invtrans, 16 * sizeof *mem);
286 unmap_mem_buffer(mbuf_invtrans);
287 }
289 void set_render_option(int opt, bool val)
290 {
291 switch(opt) {
292 case ROPT_ITER:
293 case ROPT_REFL:
294 rinf.max_iter = val ? saved_iter_val : 0;
295 break;
297 case ROPT_SHAD:
298 rinf.cast_shadows = val;
299 break;
301 default:
302 return;
303 }
305 update_render_info();
306 }
308 void set_render_option(int opt, int val)
309 {
310 switch(opt) {
311 case ROPT_ITER:
312 rinf.max_iter = saved_iter_val = val;
313 break;
315 case ROPT_SHAD:
316 rinf.cast_shadows = val;
317 break;
319 case ROPT_REFL:
320 rinf.max_iter = val ? saved_iter_val : 0;
321 break;
323 default:
324 return;
325 }
327 update_render_info();
328 }
330 void set_render_option(int opt, float val)
331 {
332 set_render_option(opt, (int)val);
333 }
335 bool get_render_option_bool(int opt)
336 {
337 switch(opt) {
338 case ROPT_ITER:
339 return rinf.max_iter;
340 case ROPT_SHAD:
341 return rinf.cast_shadows;
342 case ROPT_REFL:
343 return rinf.max_iter == saved_iter_val;
344 default:
345 break;
346 }
347 return false;
348 }
350 int get_render_option_int(int opt)
351 {
352 switch(opt) {
353 case ROPT_ITER:
354 return rinf.max_iter;
355 case ROPT_SHAD:
356 return rinf.cast_shadows ? 1 : 0;
357 case ROPT_REFL:
358 return rinf.max_iter == saved_iter_val ? 1 : 0;
359 default:
360 break;
361 }
362 return -1;
363 }
365 float get_render_option_float(int opt)
366 {
367 return (float)get_render_option_int(opt);
368 }
370 static void update_render_info()
371 {
372 if(!prog) {
373 return;
374 }
376 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_RENDER_INFO);
377 assert(mbuf);
379 RendInfo *rinf_ptr = (RendInfo*)map_mem_buffer(mbuf, MAP_WR);
380 *rinf_ptr = rinf;
381 unmap_mem_buffer(mbuf);
382 }
384 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg)
385 {
386 float vfov = M_PI * vfov_deg / 180.0;
387 float aspect = (float)w / (float)h;
389 float ysz = 2.0;
390 float xsz = aspect * ysz;
392 float px = ((float)x / (float)w) * xsz - xsz / 2.0;
393 float py = 1.0 - ((float)y / (float)h) * ysz;
394 float pz = 1.0 / tan(0.5 * vfov);
396 float mag = sqrt(px * px + py * py + pz * pz);
398 px = px * RAY_MAG / mag;
399 py = py * RAY_MAG / mag;
400 pz = pz * RAY_MAG / mag;
402 Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}};
403 return ray;
404 }
406 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret)
407 {
408 int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT);
409 int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1;
410 int xsz = KDIMG_NODE_WIDTH * columns;
412 printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes);
414 float *img = new float[4 * xsz * ysz];
415 memset(img, 0, 4 * xsz * ysz * sizeof *img);
417 for(int i=0; i<num_nodes; i++) {
418 int x = KDIMG_NODE_WIDTH * (i / KDIMG_MAX_HEIGHT);
419 int y = i % KDIMG_MAX_HEIGHT;
421 float *ptr = img + (y * xsz + x) * 4;
423 *ptr++ = kdtree[i].aabb.min[0];
424 *ptr++ = kdtree[i].aabb.min[1];
425 *ptr++ = kdtree[i].aabb.min[2];
426 *ptr++ = 0.0;
428 *ptr++ = kdtree[i].aabb.max[0];
429 *ptr++ = kdtree[i].aabb.max[1];
430 *ptr++ = kdtree[i].aabb.max[2];
431 *ptr++ = 0.0;
433 for(int j=0; j<MAX_NODE_FACES; j++) {
434 *ptr++ = j < kdtree[i].num_faces ? (float)kdtree[i].face_idx[j] : 0.0f;
435 }
437 *ptr++ = (float)kdtree[i].num_faces;
438 *ptr++ = (float)kdtree[i].left;
439 *ptr++ = (float)kdtree[i].right;
440 *ptr++ = 0.0;
441 }
443 if(xsz_ret) *xsz_ret = xsz;
444 if(ysz_ret) *ysz_ret = ysz;
445 return img;
446 }