clray

annotate src/rt.cc @ 43:f9eec11e5acc

shoehorned the kdtree into an opnecl image and improved performance slightly
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 28 Aug 2010 09:38:49 +0100
parents 057b8575a1c1
children 8047637961a2
rev   line source
nuclear@0 1 #include <stdio.h>
nuclear@8 2 #include <string.h>
nuclear@2 3 #include <math.h>
nuclear@0 4 #include <assert.h>
John@14 5 #include "ogl.h"
nuclear@0 6 #include "ocl.h"
nuclear@22 7 #include "scene.h"
nuclear@32 8 #include "timer.h"
nuclear@0 9
nuclear@12 10 // kernel arguments
nuclear@12 11 enum {
nuclear@12 12 KARG_FRAMEBUFFER,
nuclear@12 13 KARG_RENDER_INFO,
nuclear@12 14 KARG_FACES,
nuclear@12 15 KARG_MATLIB,
nuclear@12 16 KARG_LIGHTS,
nuclear@12 17 KARG_PRIM_RAYS,
nuclear@12 18 KARG_XFORM,
John@14 19 KARG_INVTRANS_XFORM,
nuclear@28 20 KARG_KDTREE,
John@14 21
John@14 22 NUM_KERNEL_ARGS
nuclear@12 23 };
John@11 24
nuclear@2 25 struct RendInfo {
nuclear@22 26 float ambient[4];
nuclear@2 27 int xsz, ysz;
nuclear@9 28 int num_faces, num_lights;
nuclear@2 29 int max_iter;
nuclear@28 30 int kd_depth;
nuclear@12 31 };
nuclear@2 32
nuclear@1 33 struct Ray {
nuclear@8 34 float origin[4], dir[4];
nuclear@12 35 };
nuclear@1 36
nuclear@3 37 struct Light {
nuclear@8 38 float pos[4], color[4];
nuclear@12 39 };
nuclear@1 40
nuclear@3 41 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg);
nuclear@43 42 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret);
nuclear@3 43
nuclear@13 44 static Face *faces;
nuclear@3 45 static Ray *prim_rays;
nuclear@3 46 static CLProgram *prog;
nuclear@3 47 static int global_size;
nuclear@3 48
nuclear@4 49 static Light lightlist[] = {
nuclear@22 50 {{-8, 15, 18, 0}, {1, 1, 1, 1}}
nuclear@4 51 };
nuclear@4 52
nuclear@7 53
nuclear@4 54 static RendInfo rinf;
nuclear@4 55
nuclear@43 56 static long timing_sample_sum;
nuclear@43 57 static long num_timing_samples;
nuclear@43 58
nuclear@4 59
nuclear@39 60 bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex)
nuclear@0 61 {
nuclear@4 62 // render info
nuclear@22 63 rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0;
nuclear@16 64 rinf.ambient[3] = 0.0;
nuclear@16 65
nuclear@4 66 rinf.xsz = xsz;
nuclear@4 67 rinf.ysz = ysz;
nuclear@13 68 rinf.num_faces = scn->get_num_faces();
nuclear@4 69 rinf.num_lights = sizeof lightlist / sizeof *lightlist;
nuclear@4 70 rinf.max_iter = 6;
nuclear@28 71 rinf.kd_depth = kdtree_depth(scn->kdtree);
nuclear@4 72
nuclear@3 73 /* calculate primary rays */
nuclear@3 74 prim_rays = new Ray[xsz * ysz];
nuclear@2 75
nuclear@2 76 for(int i=0; i<ysz; i++) {
nuclear@2 77 for(int j=0; j<xsz; j++) {
nuclear@2 78 prim_rays[i * xsz + j] = get_primary_ray(j, i, xsz, ysz, 45.0);
nuclear@2 79 }
nuclear@0 80 }
nuclear@0 81
nuclear@2 82 /* setup opencl */
nuclear@3 83 prog = new CLProgram("render");
nuclear@3 84 if(!prog->load("rt.cl")) {
nuclear@8 85 return false;
nuclear@0 86 }
nuclear@0 87
nuclear@24 88 if(!(faces = (Face*)scn->get_face_buffer())) {
nuclear@13 89 fprintf(stderr, "failed to create face buffer\n");
nuclear@13 90 return false;
nuclear@13 91 }
nuclear@13 92
nuclear@28 93 const KDNodeGPU *kdbuf = scn->get_kdtree_buffer();
nuclear@28 94 if(!kdbuf) {
nuclear@28 95 fprintf(stderr, "failed to create kdtree buffer\n");
nuclear@28 96 return false;
nuclear@28 97 }
nuclear@43 98
nuclear@43 99 int kdimg_xsz, kdimg_ysz;
nuclear@43 100 float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz);
nuclear@28 101
nuclear@3 102 /* setup argument buffers */
nuclear@41 103 #ifdef CLGL_INTEROP
nuclear@39 104 prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex);
nuclear@41 105 #else
nuclear@41 106 prog->set_arg_image(KARG_FRAMEBUFFER, ARG_WR, xsz, ysz);
nuclear@41 107 #endif
nuclear@12 108 prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf);
John@14 109 prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces);
John@14 110 prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials());
nuclear@12 111 prog->set_arg_buffer(KARG_LIGHTS, ARG_RD, sizeof lightlist, lightlist);
nuclear@12 112 prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
nuclear@12 113 prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float));
nuclear@12 114 prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float));
nuclear@43 115 //prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf);
nuclear@43 116 prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels);
nuclear@43 117
nuclear@43 118 delete [] kdimg_pixels;
nuclear@43 119
nuclear@12 120
John@14 121 if(prog->get_num_args() < NUM_KERNEL_ARGS) {
John@14 122 return false;
John@14 123 }
John@14 124
nuclear@16 125 if(!prog->build()) {
nuclear@16 126 return false;
nuclear@16 127 }
nuclear@16 128
nuclear@12 129 delete [] prim_rays;
nuclear@2 130
nuclear@3 131 global_size = xsz * ysz;
nuclear@3 132 return true;
nuclear@3 133 }
nuclear@3 134
nuclear@3 135 void destroy_renderer()
nuclear@3 136 {
nuclear@3 137 delete prog;
nuclear@43 138
nuclear@43 139 printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples);
nuclear@3 140 }
nuclear@3 141
nuclear@3 142 bool render()
nuclear@3 143 {
nuclear@39 144 // XXX do we need to call glFinish ?
nuclear@39 145
nuclear@32 146 long tm0 = get_msec();
nuclear@32 147
nuclear@40 148 #ifdef CLGL_INTEROP
nuclear@39 149 cl_event ev;
nuclear@39 150 CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
nuclear@39 151
nuclear@39 152 if(!acquire_gl_object(texbuf, &ev)) {
nuclear@39 153 return false;
nuclear@39 154 }
nuclear@39 155
nuclear@39 156 // make sure that we will wait for the acquire to finish before running
nuclear@39 157 prog->set_wait_event(ev);
nuclear@40 158 #endif
nuclear@39 159
nuclear@3 160 if(!prog->run(1, global_size)) {
nuclear@3 161 return false;
nuclear@0 162 }
John@15 163
nuclear@40 164 #ifdef CLGL_INTEROP
nuclear@39 165 if(!release_gl_object(texbuf, &ev)) {
nuclear@39 166 return false;
nuclear@39 167 }
nuclear@39 168 clWaitForEvents(1, &ev);
nuclear@40 169 #endif
nuclear@39 170
nuclear@40 171 #ifndef CLGL_INTEROP
nuclear@40 172 /* if we don't compile in CL/GL interoperability support, we need
nuclear@40 173 * to copy the output buffer to the OpenGL texture used to displaying
nuclear@40 174 * the image.
nuclear@40 175 */
nuclear@13 176 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
nuclear@12 177 void *fb = map_mem_buffer(mbuf, MAP_RD);
nuclear@13 178 if(!fb) {
nuclear@13 179 fprintf(stderr, "FAILED\n");
nuclear@13 180 return false;
nuclear@13 181 }
nuclear@13 182
nuclear@12 183 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rinf.xsz, rinf.ysz, GL_RGBA, GL_FLOAT, fb);
nuclear@2 184 unmap_mem_buffer(mbuf);
nuclear@40 185 #endif
nuclear@32 186
nuclear@43 187 long msec = get_msec() - tm0;
nuclear@43 188 timing_sample_sum += msec;
nuclear@43 189 num_timing_samples++;
nuclear@43 190
nuclear@43 191 printf("rendered in %ld msec\n", msec);
nuclear@3 192 return true;
nuclear@0 193 }
nuclear@2 194
nuclear@27 195 #define MIN(a, b) ((a) < (b) ? (a) : (b))
nuclear@21 196 static void dbg_set_gl_material(Material *mat)
nuclear@21 197 {
nuclear@21 198 static Material def_mat = {{0.7, 0.7, 0.7, 1}, {0, 0, 0, 0}, 0, 0, 0};
nuclear@21 199
nuclear@21 200 if(!mat) mat = &def_mat;
nuclear@21 201
nuclear@21 202 glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, mat->kd);
nuclear@21 203 glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, mat->ks);
nuclear@27 204 glMaterialf(GL_FRONT_AND_BACK, GL_SHININESS, MIN(mat->spow, 128.0f));
nuclear@21 205 }
nuclear@21 206
nuclear@27 207 void dbg_render_gl(Scene *scn, bool show_tree, bool show_obj)
nuclear@8 208 {
nuclear@22 209 glPushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT | GL_LIGHTING_BIT);
nuclear@8 210
nuclear@21 211 for(int i=0; i<rinf.num_lights; i++) {
nuclear@21 212 float lpos[4];
nuclear@21 213
nuclear@21 214 memcpy(lpos, lightlist[i].pos, sizeof lpos);
nuclear@21 215 lpos[3] = 1.0;
nuclear@21 216
nuclear@21 217 glLightfv(GL_LIGHT0 + i, GL_POSITION, lpos);
nuclear@21 218 glLightfv(GL_LIGHT0 + i, GL_DIFFUSE, lightlist[i].color);
nuclear@22 219 glEnable(GL_LIGHT0 + i);
nuclear@21 220 }
nuclear@21 221
nuclear@12 222 glDisable(GL_TEXTURE_2D);
nuclear@12 223 glEnable(GL_DEPTH_TEST);
John@15 224 glEnable(GL_LIGHTING);
nuclear@12 225
nuclear@12 226 glMatrixMode(GL_PROJECTION);
nuclear@12 227 glPushMatrix();
nuclear@12 228 glLoadIdentity();
nuclear@12 229 gluPerspective(45.0, (float)rinf.xsz / (float)rinf.ysz, 0.5, 1000.0);
nuclear@12 230
nuclear@27 231 if(show_obj) {
nuclear@27 232 Material *materials = scn->get_materials();
John@14 233
nuclear@27 234 int num_faces = scn->get_num_faces();
nuclear@27 235 int cur_mat = -1;
nuclear@21 236
nuclear@27 237 for(int i=0; i<num_faces; i++) {
nuclear@27 238 if(faces[i].matid != cur_mat) {
nuclear@27 239 if(cur_mat != -1) {
nuclear@27 240 glEnd();
nuclear@27 241 }
nuclear@27 242 dbg_set_gl_material(materials ? materials + faces[i].matid : 0);
nuclear@27 243 cur_mat = faces[i].matid;
nuclear@27 244 glBegin(GL_TRIANGLES);
nuclear@21 245 }
nuclear@27 246
nuclear@27 247 for(int j=0; j<3; j++) {
nuclear@27 248 glNormal3fv(faces[i].v[j].normal);
nuclear@27 249 glVertex3fv(faces[i].v[j].pos);
nuclear@27 250 }
John@14 251 }
nuclear@27 252 glEnd();
nuclear@27 253 }
nuclear@12 254
nuclear@27 255 if(show_tree) {
nuclear@27 256 scn->draw_kdtree();
nuclear@12 257 }
nuclear@12 258
nuclear@12 259 glPopMatrix();
nuclear@12 260 glPopAttrib();
nuclear@22 261
nuclear@22 262 assert(glGetError() == GL_NO_ERROR);
nuclear@12 263 }
nuclear@12 264
nuclear@12 265 void set_xform(float *matrix, float *invtrans)
nuclear@12 266 {
nuclear@12 267 CLMemBuffer *mbuf_xform = prog->get_arg_buffer(KARG_XFORM);
nuclear@12 268 CLMemBuffer *mbuf_invtrans = prog->get_arg_buffer(KARG_INVTRANS_XFORM);
nuclear@12 269 assert(mbuf_xform && mbuf_invtrans);
nuclear@12 270
nuclear@12 271 float *mem = (float*)map_mem_buffer(mbuf_xform, MAP_WR);
nuclear@12 272 memcpy(mem, matrix, 16 * sizeof *mem);
nuclear@12 273 unmap_mem_buffer(mbuf_xform);
nuclear@12 274
nuclear@12 275 mem = (float*)map_mem_buffer(mbuf_invtrans, MAP_WR);
nuclear@12 276 memcpy(mem, invtrans, 16 * sizeof *mem);
nuclear@12 277 unmap_mem_buffer(mbuf_invtrans);
nuclear@8 278 }
nuclear@8 279
nuclear@3 280 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg)
nuclear@2 281 {
nuclear@2 282 float vfov = M_PI * vfov_deg / 180.0;
nuclear@2 283 float aspect = (float)w / (float)h;
nuclear@2 284
nuclear@2 285 float ysz = 2.0;
nuclear@2 286 float xsz = aspect * ysz;
nuclear@2 287
nuclear@2 288 float px = ((float)x / (float)w) * xsz - xsz / 2.0;
nuclear@2 289 float py = 1.0 - ((float)y / (float)h) * ysz;
nuclear@2 290 float pz = 1.0 / tan(0.5 * vfov);
nuclear@2 291
nuclear@43 292 float mag = sqrt(px * px + py * py + pz * pz);
nuclear@43 293
nuclear@43 294 px = px * 500.0 / mag;
nuclear@43 295 py = py * 500.0 / mag;
nuclear@43 296 pz = pz * 500.0 / mag;
nuclear@2 297
nuclear@18 298 Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}};
nuclear@2 299 return ray;
nuclear@2 300 }
nuclear@43 301
nuclear@43 302 static int next_pow2(int x)
nuclear@43 303 {
nuclear@43 304 x--;
nuclear@43 305 x = (x >> 1) | x;
nuclear@43 306 x = (x >> 2) | x;
nuclear@43 307 x = (x >> 4) | x;
nuclear@43 308 x = (x >> 8) | x;
nuclear@43 309 x = (x >> 16) | x;
nuclear@43 310 return x + 1;
nuclear@43 311 }
nuclear@43 312
nuclear@43 313 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret)
nuclear@43 314 {
nuclear@43 315 int xsz = 16;
nuclear@43 316 int ysz = next_pow2(num_nodes);
nuclear@43 317
nuclear@43 318 printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes);
nuclear@43 319
nuclear@43 320 float *img = new float[4 * xsz * ysz];
nuclear@43 321 memset(img, 0, 4 * xsz * ysz * sizeof *img);
nuclear@43 322
nuclear@43 323 for(int i=0; i<num_nodes; i++) {
nuclear@43 324 float *ptr = img + i * 4 * xsz;
nuclear@43 325
nuclear@43 326 *ptr++ = kdtree[i].aabb.min[0];
nuclear@43 327 *ptr++ = kdtree[i].aabb.min[1];
nuclear@43 328 *ptr++ = kdtree[i].aabb.min[2];
nuclear@43 329 *ptr++ = 0.0;
nuclear@43 330
nuclear@43 331 *ptr++ = kdtree[i].aabb.max[0];
nuclear@43 332 *ptr++ = kdtree[i].aabb.max[1];
nuclear@43 333 *ptr++ = kdtree[i].aabb.max[2];
nuclear@43 334 *ptr++ = 0.0;
nuclear@43 335
nuclear@43 336 for(int j=0; j<MAX_NODE_FACES; j++) {
nuclear@43 337 *ptr++ = j < kdtree[i].num_faces ? (float)kdtree[i].face_idx[j] : 0.0f;
nuclear@43 338 }
nuclear@43 339
nuclear@43 340 *ptr++ = (float)kdtree[i].num_faces;
nuclear@43 341 *ptr++ = (float)kdtree[i].left;
nuclear@43 342 *ptr++ = (float)kdtree[i].right;
nuclear@43 343 *ptr++ = 0.0;
nuclear@43 344 }
nuclear@43 345
nuclear@43 346 if(xsz_ret) *xsz_ret = xsz;
nuclear@43 347 if(ysz_ret) *ysz_ret = ysz;
nuclear@43 348 return img;
nuclear@43 349 }