clray

annotate src/rt.cc @ 62:d9520da6b801

minor readme fix
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 28 Dec 2015 10:31:58 +0200
parents 3d13924b22e6
children
rev   line source
nuclear@0 1 #include <stdio.h>
nuclear@8 2 #include <string.h>
nuclear@2 3 #include <math.h>
nuclear@58 4 #include <limits.h>
nuclear@0 5 #include <assert.h>
nuclear@47 6 #include "rt.h"
John@14 7 #include "ogl.h"
nuclear@0 8 #include "ocl.h"
nuclear@22 9 #include "scene.h"
nuclear@32 10 #include "timer.h"
nuclear@45 11 #include "common.h"
nuclear@0 12
nuclear@12 13 // kernel arguments
nuclear@12 14 enum {
nuclear@12 15 KARG_FRAMEBUFFER,
nuclear@12 16 KARG_RENDER_INFO,
nuclear@12 17 KARG_FACES,
nuclear@12 18 KARG_MATLIB,
nuclear@12 19 KARG_LIGHTS,
nuclear@12 20 KARG_PRIM_RAYS,
nuclear@12 21 KARG_XFORM,
John@14 22 KARG_INVTRANS_XFORM,
nuclear@28 23 KARG_KDTREE,
John@14 24
John@14 25 NUM_KERNEL_ARGS
nuclear@12 26 };
John@11 27
nuclear@47 28 static void update_render_info();
nuclear@3 29 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg);
nuclear@43 30 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret);
nuclear@3 31
nuclear@13 32 static Face *faces;
nuclear@3 33 static Ray *prim_rays;
nuclear@3 34 static CLProgram *prog;
nuclear@3 35 static int global_size;
nuclear@3 36
nuclear@7 37
nuclear@4 38 static RendInfo rinf;
nuclear@55 39 static RenderStats rstat;
nuclear@47 40 static int saved_iter_val;
nuclear@4 41
nuclear@43 42 static long timing_sample_sum;
nuclear@43 43 static long num_timing_samples;
nuclear@43 44
nuclear@4 45
nuclear@39 46 bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex)
nuclear@0 47 {
nuclear@4 48 // render info
nuclear@22 49 rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0;
nuclear@16 50 rinf.ambient[3] = 0.0;
nuclear@16 51
nuclear@4 52 rinf.xsz = xsz;
nuclear@4 53 rinf.ysz = ysz;
nuclear@13 54 rinf.num_faces = scn->get_num_faces();
nuclear@54 55 rinf.num_lights = scn->get_num_lights();
nuclear@47 56 rinf.max_iter = saved_iter_val = 6;
nuclear@47 57 rinf.cast_shadows = true;
nuclear@4 58
nuclear@3 59 /* calculate primary rays */
nuclear@3 60 prim_rays = new Ray[xsz * ysz];
nuclear@2 61
nuclear@2 62 for(int i=0; i<ysz; i++) {
nuclear@2 63 for(int j=0; j<xsz; j++) {
nuclear@2 64 prim_rays[i * xsz + j] = get_primary_ray(j, i, xsz, ysz, 45.0);
nuclear@2 65 }
nuclear@0 66 }
nuclear@54 67 dbg_set_primary_rays(prim_rays); // give them to the debug renderer
nuclear@0 68
nuclear@2 69 /* setup opencl */
nuclear@3 70 prog = new CLProgram("render");
nuclear@54 71 if(!prog->load("src/rt.cl")) {
nuclear@8 72 return false;
nuclear@0 73 }
nuclear@0 74
nuclear@24 75 if(!(faces = (Face*)scn->get_face_buffer())) {
nuclear@13 76 fprintf(stderr, "failed to create face buffer\n");
nuclear@13 77 return false;
nuclear@13 78 }
nuclear@13 79
nuclear@28 80 const KDNodeGPU *kdbuf = scn->get_kdtree_buffer();
nuclear@28 81 if(!kdbuf) {
nuclear@28 82 fprintf(stderr, "failed to create kdtree buffer\n");
nuclear@28 83 return false;
nuclear@28 84 }
nuclear@43 85
nuclear@43 86 int kdimg_xsz, kdimg_ysz;
nuclear@43 87 float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz);
nuclear@28 88
nuclear@3 89 /* setup argument buffers */
nuclear@41 90 #ifdef CLGL_INTEROP
nuclear@39 91 prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex);
nuclear@41 92 #else
nuclear@41 93 prog->set_arg_image(KARG_FRAMEBUFFER, ARG_WR, xsz, ysz);
nuclear@41 94 #endif
nuclear@12 95 prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf);
John@14 96 prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces);
John@14 97 prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials());
nuclear@54 98 prog->set_arg_buffer(KARG_LIGHTS, ARG_RD, scn->get_num_lights() * sizeof(Light), scn->get_lights());
nuclear@12 99 prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
nuclear@12 100 prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float));
nuclear@12 101 prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float));
nuclear@43 102 //prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf);
nuclear@43 103 prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels);
nuclear@43 104
nuclear@43 105 delete [] kdimg_pixels;
nuclear@43 106
nuclear@12 107
John@14 108 if(prog->get_num_args() < NUM_KERNEL_ARGS) {
John@14 109 return false;
John@14 110 }
John@14 111
nuclear@45 112 const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math";
nuclear@45 113 if(!prog->build(opt)) {
nuclear@16 114 return false;
nuclear@16 115 }
nuclear@16 116
nuclear@54 117 //delete [] prim_rays; now dbg_renderer handles them
nuclear@2 118
nuclear@3 119 global_size = xsz * ysz;
nuclear@54 120
nuclear@54 121
nuclear@54 122 init_dbg_renderer(xsz, ysz, scn, tex);
nuclear@3 123 return true;
nuclear@3 124 }
nuclear@3 125
nuclear@3 126 void destroy_renderer()
nuclear@3 127 {
nuclear@3 128 delete prog;
nuclear@43 129
nuclear@54 130 destroy_dbg_renderer();
nuclear@54 131
nuclear@54 132 if(num_timing_samples) {
nuclear@54 133 printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples);
nuclear@54 134 }
nuclear@3 135 }
nuclear@3 136
nuclear@3 137 bool render()
nuclear@3 138 {
nuclear@58 139 long tm0 = get_msec();
nuclear@39 140
nuclear@58 141 // initialize render-stats
nuclear@58 142 memset(&rstat, 0, sizeof rstat);
nuclear@58 143 rstat.min_aabb_tests = rstat.min_triangle_tests = INT_MAX;
nuclear@58 144 rstat.max_aabb_tests = rstat.max_triangle_tests = 0;
nuclear@32 145
nuclear@40 146 #ifdef CLGL_INTEROP
nuclear@39 147 cl_event ev;
nuclear@39 148 CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
nuclear@39 149
nuclear@39 150 if(!acquire_gl_object(texbuf, &ev)) {
nuclear@39 151 return false;
nuclear@39 152 }
nuclear@39 153
nuclear@39 154 // make sure that we will wait for the acquire to finish before running
nuclear@39 155 prog->set_wait_event(ev);
nuclear@40 156 #endif
nuclear@39 157
nuclear@3 158 if(!prog->run(1, global_size)) {
nuclear@3 159 return false;
nuclear@0 160 }
John@15 161
nuclear@40 162 #ifdef CLGL_INTEROP
nuclear@39 163 if(!release_gl_object(texbuf, &ev)) {
nuclear@39 164 return false;
nuclear@39 165 }
nuclear@39 166 clWaitForEvents(1, &ev);
nuclear@40 167 #endif
nuclear@39 168
nuclear@40 169 #ifndef CLGL_INTEROP
nuclear@40 170 /* if we don't compile in CL/GL interoperability support, we need
nuclear@40 171 * to copy the output buffer to the OpenGL texture used to displaying
nuclear@40 172 * the image.
nuclear@40 173 */
nuclear@13 174 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER);
nuclear@12 175 void *fb = map_mem_buffer(mbuf, MAP_RD);
nuclear@13 176 if(!fb) {
nuclear@13 177 fprintf(stderr, "FAILED\n");
nuclear@13 178 return false;
nuclear@13 179 }
nuclear@13 180
nuclear@12 181 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rinf.xsz, rinf.ysz, GL_RGBA, GL_FLOAT, fb);
nuclear@2 182 unmap_mem_buffer(mbuf);
nuclear@40 183 #endif
nuclear@32 184
nuclear@60 185 finish_opencl();
nuclear@60 186
nuclear@58 187 rstat.render_time = get_msec() - tm0;
nuclear@60 188 printf("FOO: %ld msec\n", rstat.render_time);
nuclear@58 189
nuclear@58 190 timing_sample_sum += rstat.render_time;
nuclear@43 191 num_timing_samples++;
nuclear@43 192
nuclear@3 193 return true;
nuclear@0 194 }
nuclear@2 195
nuclear@12 196
nuclear@12 197 void set_xform(float *matrix, float *invtrans)
nuclear@12 198 {
nuclear@12 199 CLMemBuffer *mbuf_xform = prog->get_arg_buffer(KARG_XFORM);
nuclear@12 200 CLMemBuffer *mbuf_invtrans = prog->get_arg_buffer(KARG_INVTRANS_XFORM);
nuclear@12 201 assert(mbuf_xform && mbuf_invtrans);
nuclear@12 202
nuclear@12 203 float *mem = (float*)map_mem_buffer(mbuf_xform, MAP_WR);
nuclear@12 204 memcpy(mem, matrix, 16 * sizeof *mem);
nuclear@12 205 unmap_mem_buffer(mbuf_xform);
nuclear@12 206
nuclear@12 207 mem = (float*)map_mem_buffer(mbuf_invtrans, MAP_WR);
nuclear@12 208 memcpy(mem, invtrans, 16 * sizeof *mem);
nuclear@12 209 unmap_mem_buffer(mbuf_invtrans);
nuclear@8 210 }
nuclear@8 211
nuclear@54 212
nuclear@54 213 const RendInfo *get_render_info()
nuclear@54 214 {
nuclear@54 215 return &rinf;
nuclear@54 216 }
nuclear@54 217
nuclear@55 218 const RenderStats *get_render_stats()
nuclear@55 219 {
nuclear@55 220 return &rstat;
nuclear@55 221 }
nuclear@55 222
nuclear@55 223 void print_render_stats(FILE *fp)
nuclear@55 224 {
nuclear@55 225 fprintf(fp, "-- render stats --\n");
nuclear@55 226 fprintf(fp, "> timing\n");
nuclear@55 227 fprintf(fp, " render time (msec): %lu\n", rstat.render_time);
nuclear@55 228 fprintf(fp, " tex update time (msec): %lu\n", rstat.tex_update_time);
nuclear@55 229 fprintf(fp, "> counters\n");
nuclear@55 230 fprintf(fp, " AABB tests: %d\n", rstat.aabb_tests);
nuclear@55 231 fprintf(fp, " AABB tests per ray (min/max/avg): %d/%d/%f\n",
nuclear@55 232 rstat.min_aabb_tests, rstat.max_aabb_tests, rstat.avg_aabb_tests);
nuclear@55 233 fprintf(fp, " triangle tests: %d\n", rstat.triangle_tests);
nuclear@55 234 fprintf(fp, " triangle tests per ray (min/max/avg): %d/%d/%f\n",
nuclear@55 235 rstat.min_triangle_tests, rstat.max_triangle_tests, rstat.avg_triangle_tests);
nuclear@55 236 fprintf(fp, " rays cast: %dp %dr %ds (sum: %d)\n", rstat.prim_rays,
nuclear@55 237 rstat.refl_rays, rstat.shadow_rays, rstat.rays_cast);
nuclear@55 238 fprintf(fp, " rays per second: %d\n", rstat.rays_per_sec);
nuclear@55 239 fprintf(fp, " BRDF evaluations: %d\n", rstat.brdf_evals);
nuclear@55 240 fputc('\n', fp);
nuclear@55 241 }
nuclear@55 242
nuclear@47 243 void set_render_option(int opt, bool val)
nuclear@47 244 {
nuclear@47 245 switch(opt) {
nuclear@47 246 case ROPT_ITER:
nuclear@47 247 case ROPT_REFL:
nuclear@47 248 rinf.max_iter = val ? saved_iter_val : 0;
nuclear@47 249 break;
nuclear@47 250
nuclear@47 251 case ROPT_SHAD:
nuclear@47 252 rinf.cast_shadows = val;
nuclear@47 253 break;
nuclear@47 254
nuclear@47 255 default:
nuclear@47 256 return;
nuclear@47 257 }
nuclear@47 258
nuclear@47 259 update_render_info();
nuclear@47 260 }
nuclear@47 261
nuclear@47 262 void set_render_option(int opt, int val)
nuclear@47 263 {
nuclear@47 264 switch(opt) {
nuclear@47 265 case ROPT_ITER:
nuclear@47 266 rinf.max_iter = saved_iter_val = val;
nuclear@47 267 break;
nuclear@47 268
nuclear@47 269 case ROPT_SHAD:
nuclear@47 270 rinf.cast_shadows = val;
nuclear@47 271 break;
nuclear@47 272
nuclear@47 273 case ROPT_REFL:
nuclear@47 274 rinf.max_iter = val ? saved_iter_val : 0;
nuclear@47 275 break;
nuclear@47 276
nuclear@47 277 default:
nuclear@47 278 return;
nuclear@47 279 }
nuclear@47 280
nuclear@47 281 update_render_info();
nuclear@47 282 }
nuclear@47 283
nuclear@47 284 void set_render_option(int opt, float val)
nuclear@47 285 {
nuclear@47 286 set_render_option(opt, (int)val);
nuclear@47 287 }
nuclear@47 288
nuclear@47 289 bool get_render_option_bool(int opt)
nuclear@47 290 {
nuclear@47 291 switch(opt) {
nuclear@47 292 case ROPT_ITER:
nuclear@47 293 return rinf.max_iter;
nuclear@47 294 case ROPT_SHAD:
nuclear@47 295 return rinf.cast_shadows;
nuclear@47 296 case ROPT_REFL:
nuclear@47 297 return rinf.max_iter == saved_iter_val;
nuclear@47 298 default:
nuclear@47 299 break;
nuclear@47 300 }
nuclear@47 301 return false;
nuclear@47 302 }
nuclear@47 303
nuclear@47 304 int get_render_option_int(int opt)
nuclear@47 305 {
nuclear@47 306 switch(opt) {
nuclear@47 307 case ROPT_ITER:
nuclear@47 308 return rinf.max_iter;
nuclear@47 309 case ROPT_SHAD:
nuclear@47 310 return rinf.cast_shadows ? 1 : 0;
nuclear@47 311 case ROPT_REFL:
nuclear@47 312 return rinf.max_iter == saved_iter_val ? 1 : 0;
nuclear@47 313 default:
nuclear@47 314 break;
nuclear@47 315 }
nuclear@47 316 return -1;
nuclear@47 317 }
nuclear@47 318
nuclear@47 319 float get_render_option_float(int opt)
nuclear@47 320 {
nuclear@47 321 return (float)get_render_option_int(opt);
nuclear@47 322 }
nuclear@47 323
nuclear@47 324 static void update_render_info()
nuclear@47 325 {
nuclear@47 326 if(!prog) {
nuclear@47 327 return;
nuclear@47 328 }
nuclear@47 329
nuclear@47 330 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_RENDER_INFO);
nuclear@47 331 assert(mbuf);
nuclear@47 332
nuclear@47 333 RendInfo *rinf_ptr = (RendInfo*)map_mem_buffer(mbuf, MAP_WR);
nuclear@47 334 *rinf_ptr = rinf;
nuclear@47 335 unmap_mem_buffer(mbuf);
nuclear@47 336 }
nuclear@47 337
nuclear@3 338 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg)
nuclear@2 339 {
nuclear@2 340 float vfov = M_PI * vfov_deg / 180.0;
nuclear@2 341 float aspect = (float)w / (float)h;
nuclear@2 342
nuclear@2 343 float ysz = 2.0;
nuclear@2 344 float xsz = aspect * ysz;
nuclear@2 345
nuclear@2 346 float px = ((float)x / (float)w) * xsz - xsz / 2.0;
nuclear@2 347 float py = 1.0 - ((float)y / (float)h) * ysz;
nuclear@2 348 float pz = 1.0 / tan(0.5 * vfov);
nuclear@2 349
nuclear@43 350 float mag = sqrt(px * px + py * py + pz * pz);
nuclear@43 351
nuclear@45 352 px = px * RAY_MAG / mag;
nuclear@45 353 py = py * RAY_MAG / mag;
nuclear@45 354 pz = pz * RAY_MAG / mag;
nuclear@2 355
nuclear@18 356 Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}};
nuclear@2 357 return ray;
nuclear@2 358 }
nuclear@43 359
nuclear@54 360 #define MIN(a, b) ((a) < (b) ? (a) : (b))
nuclear@54 361
nuclear@43 362 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret)
nuclear@43 363 {
nuclear@45 364 int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT);
nuclear@45 365 int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1;
nuclear@45 366 int xsz = KDIMG_NODE_WIDTH * columns;
nuclear@43 367
nuclear@43 368 printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes);
nuclear@43 369
nuclear@43 370 float *img = new float[4 * xsz * ysz];
nuclear@43 371 memset(img, 0, 4 * xsz * ysz * sizeof *img);
nuclear@43 372
nuclear@43 373 for(int i=0; i<num_nodes; i++) {
nuclear@45 374 int x = KDIMG_NODE_WIDTH * (i / KDIMG_MAX_HEIGHT);
nuclear@45 375 int y = i % KDIMG_MAX_HEIGHT;
nuclear@45 376
nuclear@45 377 float *ptr = img + (y * xsz + x) * 4;
nuclear@43 378
nuclear@43 379 *ptr++ = kdtree[i].aabb.min[0];
nuclear@43 380 *ptr++ = kdtree[i].aabb.min[1];
nuclear@43 381 *ptr++ = kdtree[i].aabb.min[2];
nuclear@43 382 *ptr++ = 0.0;
nuclear@43 383
nuclear@43 384 *ptr++ = kdtree[i].aabb.max[0];
nuclear@43 385 *ptr++ = kdtree[i].aabb.max[1];
nuclear@43 386 *ptr++ = kdtree[i].aabb.max[2];
nuclear@43 387 *ptr++ = 0.0;
nuclear@43 388
nuclear@43 389 for(int j=0; j<MAX_NODE_FACES; j++) {
nuclear@43 390 *ptr++ = j < kdtree[i].num_faces ? (float)kdtree[i].face_idx[j] : 0.0f;
nuclear@43 391 }
nuclear@43 392
nuclear@43 393 *ptr++ = (float)kdtree[i].num_faces;
nuclear@43 394 *ptr++ = (float)kdtree[i].left;
nuclear@43 395 *ptr++ = (float)kdtree[i].right;
nuclear@43 396 *ptr++ = 0.0;
nuclear@43 397 }
nuclear@43 398
nuclear@43 399 if(xsz_ret) *xsz_ret = xsz;
nuclear@43 400 if(ysz_ret) *ysz_ret = ysz;
nuclear@43 401 return img;
nuclear@43 402 }