clray

view src/rt.cc @ 45:8047637961a2

fixed the issue of hitting maximum vertical image sizes for large kdtrees
author John Tsiombikas <nuclear@member.fsf.org>
date Sun, 29 Aug 2010 04:20:42 +0100
parents f9eec11e5acc
children 30bf84881553
line source
1 #include <stdio.h>
2 #include <string.h>
3 #include <math.h>
4 #include <assert.h>
5 #include "ogl.h"
6 #include "ocl.h"
7 #include "scene.h"
8 #include "timer.h"
9 #include "common.h"
11 // kernel arguments
12 enum {
13 KARG_FRAMEBUFFER,
14 KARG_RENDER_INFO,
15 KARG_FACES,
16 KARG_MATLIB,
17 KARG_LIGHTS,
18 KARG_PRIM_RAYS,
19 KARG_XFORM,
20 KARG_INVTRANS_XFORM,
21 KARG_KDTREE,
23 NUM_KERNEL_ARGS
24 };
26 struct RendInfo {
27 float ambient[4];
28 int xsz, ysz;
29 int num_faces, num_lights;
30 int max_iter;
31 int kd_depth;
32 };
34 struct Ray {
35 float origin[4], dir[4];
36 };
38 struct Light {
39 float pos[4], color[4];
40 };
42 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg);
43 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret);
45 static Face *faces;
46 static Ray *prim_rays;
47 static CLProgram *prog;
48 static int global_size;
50 static Light lightlist[] = {
51 {{-8, 15, 18, 0}, {1, 1, 1, 1}}
52 };
55 static RendInfo rinf;
57 static long timing_sample_sum;
58 static long num_timing_samples;
61 bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex)
62 {
63 // render info
64 rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0;
65 rinf.ambient[3] = 0.0;
67 rinf.xsz = xsz;
68 rinf.ysz = ysz;
69 rinf.num_faces = scn->get_num_faces();
70 rinf.num_lights = sizeof lightlist / sizeof *lightlist;
71 rinf.max_iter = 6;
72 rinf.kd_depth = kdtree_depth(scn->kdtree);
74 /* calculate primary rays */
75 prim_rays = new Ray[xsz * ysz];
77 for(int i=0; i<ysz; i++) {
78 for(int j=0; j<xsz; j++) {
79 prim_rays[i * xsz + j] = get_primary_ray(j, i, xsz, ysz, 45.0);
80 }
81 }
83 /* setup opencl */
84 prog = new CLProgram("render");
85 if(!prog->load("rt.cl")) {
86 return false;
87 }
89 if(!(faces = (Face*)scn->get_face_buffer())) {
90 fprintf(stderr, "failed to create face buffer\n");
91 return false;
92 }
94 const KDNodeGPU *kdbuf = scn->get_kdtree_buffer();
95 if(!kdbuf) {
96 fprintf(stderr, "failed to create kdtree buffer\n");
97 return false;
98 }
100 int kdimg_xsz, kdimg_ysz;
101 float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz);
103 /* setup argument buffers */
104 #ifdef CLGL_INTEROP
105 prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex);
106 #else
107 prog->set_arg_image(KARG_FRAMEBUFFER, ARG_WR, xsz, ysz);
108 #endif
109 prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf);
110 prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces);
111 prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials());
112 prog->set_arg_buffer(KARG_LIGHTS, ARG_RD, sizeof lightlist, lightlist);
113 prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
114 prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float));
115 prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float));
116 //prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf);
117 prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels);
119 delete [] kdimg_pixels;
122 if(prog->get_num_args() < NUM_KERNEL_ARGS) {
123 return false;
124 }
126 const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math";
127 if(!prog->build(opt)) {
128 return false;
129 }
131 delete [] prim_rays;
133 global_size = xsz * ysz;
134 return true;
135 }
137 void destroy_renderer()
138 {
139 delete prog;
141 printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples);
142 }
144 bool render()
145 {
146 // XXX do we need to call glFinish ?
148 long tm0 = get_msec();
150 #ifdef CLGL_INTEROP
151 cl_event ev;
152 CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
154 if(!acquire_gl_object(texbuf, &ev)) {
155 return false;
156 }
158 // make sure that we will wait for the acquire to finish before running
159 prog->set_wait_event(ev);
160 #endif
162 if(!prog->run(1, global_size)) {
163 return false;
164 }
166 #ifdef CLGL_INTEROP
167 if(!release_gl_object(texbuf, &ev)) {
168 return false;
169 }
170 clWaitForEvents(1, &ev);
171 #endif
173 #ifndef CLGL_INTEROP
174 /* if we don't compile in CL/GL interoperability support, we need
175 * to copy the output buffer to the OpenGL texture used to displaying
176 * the image.
177 */
178 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
179 void *fb = map_mem_buffer(mbuf, MAP_RD);
180 if(!fb) {
181 fprintf(stderr, "FAILED\n");
182 return false;
183 }
185 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rinf.xsz, rinf.ysz, GL_RGBA, GL_FLOAT, fb);
186 unmap_mem_buffer(mbuf);
187 #endif
189 long msec = get_msec() - tm0;
190 timing_sample_sum += msec;
191 num_timing_samples++;
193 printf("rendered in %ld msec\n", msec);
194 return true;
195 }
197 #define MIN(a, b) ((a) < (b) ? (a) : (b))
198 static void dbg_set_gl_material(Material *mat)
199 {
200 static Material def_mat = {{0.7, 0.7, 0.7, 1}, {0, 0, 0, 0}, 0, 0, 0};
202 if(!mat) mat = &def_mat;
204 glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, mat->kd);
205 glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, mat->ks);
206 glMaterialf(GL_FRONT_AND_BACK, GL_SHININESS, MIN(mat->spow, 128.0f));
207 }
209 void dbg_render_gl(Scene *scn, bool show_tree, bool show_obj)
210 {
211 glPushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT | GL_LIGHTING_BIT);
213 for(int i=0; i<rinf.num_lights; i++) {
214 float lpos[4];
216 memcpy(lpos, lightlist[i].pos, sizeof lpos);
217 lpos[3] = 1.0;
219 glLightfv(GL_LIGHT0 + i, GL_POSITION, lpos);
220 glLightfv(GL_LIGHT0 + i, GL_DIFFUSE, lightlist[i].color);
221 glEnable(GL_LIGHT0 + i);
222 }
224 glDisable(GL_TEXTURE_2D);
225 glEnable(GL_DEPTH_TEST);
226 glEnable(GL_LIGHTING);
228 glMatrixMode(GL_PROJECTION);
229 glPushMatrix();
230 glLoadIdentity();
231 gluPerspective(45.0, (float)rinf.xsz / (float)rinf.ysz, 0.5, 1000.0);
233 if(show_obj) {
234 Material *materials = scn->get_materials();
236 int num_faces = scn->get_num_faces();
237 int cur_mat = -1;
239 for(int i=0; i<num_faces; i++) {
240 if(faces[i].matid != cur_mat) {
241 if(cur_mat != -1) {
242 glEnd();
243 }
244 dbg_set_gl_material(materials ? materials + faces[i].matid : 0);
245 cur_mat = faces[i].matid;
246 glBegin(GL_TRIANGLES);
247 }
249 for(int j=0; j<3; j++) {
250 glNormal3fv(faces[i].v[j].normal);
251 glVertex3fv(faces[i].v[j].pos);
252 }
253 }
254 glEnd();
255 }
257 if(show_tree) {
258 scn->draw_kdtree();
259 }
261 glPopMatrix();
262 glPopAttrib();
264 assert(glGetError() == GL_NO_ERROR);
265 }
267 void set_xform(float *matrix, float *invtrans)
268 {
269 CLMemBuffer *mbuf_xform = prog->get_arg_buffer(KARG_XFORM);
270 CLMemBuffer *mbuf_invtrans = prog->get_arg_buffer(KARG_INVTRANS_XFORM);
271 assert(mbuf_xform && mbuf_invtrans);
273 float *mem = (float*)map_mem_buffer(mbuf_xform, MAP_WR);
274 memcpy(mem, matrix, 16 * sizeof *mem);
275 unmap_mem_buffer(mbuf_xform);
277 mem = (float*)map_mem_buffer(mbuf_invtrans, MAP_WR);
278 memcpy(mem, invtrans, 16 * sizeof *mem);
279 unmap_mem_buffer(mbuf_invtrans);
280 }
282 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg)
283 {
284 float vfov = M_PI * vfov_deg / 180.0;
285 float aspect = (float)w / (float)h;
287 float ysz = 2.0;
288 float xsz = aspect * ysz;
290 float px = ((float)x / (float)w) * xsz - xsz / 2.0;
291 float py = 1.0 - ((float)y / (float)h) * ysz;
292 float pz = 1.0 / tan(0.5 * vfov);
294 float mag = sqrt(px * px + py * py + pz * pz);
296 px = px * RAY_MAG / mag;
297 py = py * RAY_MAG / mag;
298 pz = pz * RAY_MAG / mag;
300 Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}};
301 return ray;
302 }
304 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret)
305 {
306 int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT);
307 int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1;
308 int xsz = KDIMG_NODE_WIDTH * columns;
310 printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes);
312 float *img = new float[4 * xsz * ysz];
313 memset(img, 0, 4 * xsz * ysz * sizeof *img);
315 for(int i=0; i<num_nodes; i++) {
316 int x = KDIMG_NODE_WIDTH * (i / KDIMG_MAX_HEIGHT);
317 int y = i % KDIMG_MAX_HEIGHT;
319 float *ptr = img + (y * xsz + x) * 4;
321 *ptr++ = kdtree[i].aabb.min[0];
322 *ptr++ = kdtree[i].aabb.min[1];
323 *ptr++ = kdtree[i].aabb.min[2];
324 *ptr++ = 0.0;
326 *ptr++ = kdtree[i].aabb.max[0];
327 *ptr++ = kdtree[i].aabb.max[1];
328 *ptr++ = kdtree[i].aabb.max[2];
329 *ptr++ = 0.0;
331 for(int j=0; j<MAX_NODE_FACES; j++) {
332 *ptr++ = j < kdtree[i].num_faces ? (float)kdtree[i].face_idx[j] : 0.0f;
333 }
335 *ptr++ = (float)kdtree[i].num_faces;
336 *ptr++ = (float)kdtree[i].left;
337 *ptr++ = (float)kdtree[i].right;
338 *ptr++ = 0.0;
339 }
341 if(xsz_ret) *xsz_ret = xsz;
342 if(ysz_ret) *ysz_ret = ysz;
343 return img;
344 }