nuclear@0: #include nuclear@8: #include nuclear@2: #include nuclear@0: #include nuclear@47: #include "rt.h" John@14: #include "ogl.h" nuclear@0: #include "ocl.h" nuclear@22: #include "scene.h" nuclear@32: #include "timer.h" nuclear@45: #include "common.h" nuclear@0: nuclear@12: // kernel arguments nuclear@12: enum { nuclear@12: KARG_FRAMEBUFFER, nuclear@12: KARG_RENDER_INFO, nuclear@12: KARG_FACES, nuclear@12: KARG_MATLIB, nuclear@12: KARG_LIGHTS, nuclear@12: KARG_PRIM_RAYS, nuclear@12: KARG_XFORM, John@14: KARG_INVTRANS_XFORM, nuclear@28: KARG_KDTREE, John@14: John@14: NUM_KERNEL_ARGS nuclear@12: }; John@11: nuclear@47: static void update_render_info(); nuclear@3: static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg); nuclear@43: static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret); nuclear@3: nuclear@13: static Face *faces; nuclear@3: static Ray *prim_rays; nuclear@3: static CLProgram *prog; nuclear@3: static int global_size; nuclear@3: nuclear@7: nuclear@4: static RendInfo rinf; nuclear@55: static RenderStats rstat; nuclear@47: static int saved_iter_val; nuclear@4: nuclear@43: static long timing_sample_sum; nuclear@43: static long num_timing_samples; nuclear@43: John@50: extern bool dbg_frame_time; John@50: nuclear@4: nuclear@39: bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex) nuclear@0: { nuclear@4: // render info nuclear@22: rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0; nuclear@16: rinf.ambient[3] = 0.0; nuclear@16: nuclear@4: rinf.xsz = xsz; nuclear@4: rinf.ysz = ysz; nuclear@13: rinf.num_faces = scn->get_num_faces(); nuclear@54: rinf.num_lights = scn->get_num_lights(); nuclear@47: rinf.max_iter = saved_iter_val = 6; nuclear@47: rinf.cast_shadows = true; nuclear@4: nuclear@3: /* calculate primary rays */ nuclear@3: prim_rays = new Ray[xsz * ysz]; nuclear@2: nuclear@2: for(int i=0; iload("src/rt.cl")) { nuclear@8: return false; nuclear@0: } nuclear@0: nuclear@24: if(!(faces = (Face*)scn->get_face_buffer())) { nuclear@13: fprintf(stderr, "failed to create face buffer\n"); nuclear@13: return false; nuclear@13: } nuclear@13: nuclear@28: const KDNodeGPU *kdbuf = scn->get_kdtree_buffer(); nuclear@28: if(!kdbuf) { nuclear@28: fprintf(stderr, "failed to create kdtree buffer\n"); nuclear@28: return false; nuclear@28: } nuclear@43: nuclear@43: int kdimg_xsz, kdimg_ysz; nuclear@43: float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz); nuclear@28: nuclear@3: /* setup argument buffers */ nuclear@41: #ifdef CLGL_INTEROP nuclear@39: prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex); nuclear@41: #else nuclear@41: prog->set_arg_image(KARG_FRAMEBUFFER, ARG_WR, xsz, ysz); nuclear@41: #endif nuclear@12: prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf); John@14: prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces); John@14: prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials()); nuclear@54: prog->set_arg_buffer(KARG_LIGHTS, ARG_RD, scn->get_num_lights() * sizeof(Light), scn->get_lights()); nuclear@12: prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays); nuclear@12: prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float)); nuclear@12: prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float)); nuclear@43: //prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf); nuclear@43: prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels); nuclear@43: nuclear@43: delete [] kdimg_pixels; nuclear@43: nuclear@12: John@14: if(prog->get_num_args() < NUM_KERNEL_ARGS) { John@14: return false; John@14: } John@14: nuclear@45: const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math"; nuclear@45: if(!prog->build(opt)) { nuclear@16: return false; nuclear@16: } nuclear@16: nuclear@54: //delete [] prim_rays; now dbg_renderer handles them nuclear@2: nuclear@3: global_size = xsz * ysz; nuclear@54: nuclear@54: nuclear@54: init_dbg_renderer(xsz, ysz, scn, tex); nuclear@3: return true; nuclear@3: } nuclear@3: nuclear@3: void destroy_renderer() nuclear@3: { nuclear@3: delete prog; nuclear@43: nuclear@54: destroy_dbg_renderer(); nuclear@54: nuclear@54: if(num_timing_samples) { nuclear@54: printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples); nuclear@54: } nuclear@3: } nuclear@3: nuclear@3: bool render() nuclear@3: { nuclear@39: // XXX do we need to call glFinish ? nuclear@39: nuclear@32: long tm0 = get_msec(); nuclear@32: nuclear@40: #ifdef CLGL_INTEROP nuclear@39: cl_event ev; nuclear@39: CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER); nuclear@39: nuclear@39: if(!acquire_gl_object(texbuf, &ev)) { nuclear@39: return false; nuclear@39: } nuclear@39: nuclear@39: // make sure that we will wait for the acquire to finish before running nuclear@39: prog->set_wait_event(ev); nuclear@40: #endif nuclear@39: nuclear@3: if(!prog->run(1, global_size)) { nuclear@3: return false; nuclear@0: } John@15: nuclear@40: #ifdef CLGL_INTEROP nuclear@39: if(!release_gl_object(texbuf, &ev)) { nuclear@39: return false; nuclear@39: } nuclear@39: clWaitForEvents(1, &ev); nuclear@40: #endif nuclear@39: nuclear@40: #ifndef CLGL_INTEROP nuclear@40: /* if we don't compile in CL/GL interoperability support, we need nuclear@40: * to copy the output buffer to the OpenGL texture used to displaying nuclear@40: * the image. nuclear@40: */ nuclear@13: CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER); nuclear@12: void *fb = map_mem_buffer(mbuf, MAP_RD); nuclear@13: if(!fb) { nuclear@13: fprintf(stderr, "FAILED\n"); nuclear@13: return false; nuclear@13: } nuclear@13: nuclear@12: glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rinf.xsz, rinf.ysz, GL_RGBA, GL_FLOAT, fb); nuclear@2: unmap_mem_buffer(mbuf); nuclear@40: #endif nuclear@32: nuclear@43: long msec = get_msec() - tm0; nuclear@43: timing_sample_sum += msec; nuclear@43: num_timing_samples++; nuclear@43: John@50: if(dbg_frame_time) { John@50: printf("rendered in %ld msec\n", msec); John@50: } nuclear@3: return true; nuclear@0: } nuclear@2: nuclear@12: nuclear@12: void set_xform(float *matrix, float *invtrans) nuclear@12: { nuclear@12: CLMemBuffer *mbuf_xform = prog->get_arg_buffer(KARG_XFORM); nuclear@12: CLMemBuffer *mbuf_invtrans = prog->get_arg_buffer(KARG_INVTRANS_XFORM); nuclear@12: assert(mbuf_xform && mbuf_invtrans); nuclear@12: nuclear@12: float *mem = (float*)map_mem_buffer(mbuf_xform, MAP_WR); nuclear@12: memcpy(mem, matrix, 16 * sizeof *mem); nuclear@12: unmap_mem_buffer(mbuf_xform); nuclear@12: nuclear@12: mem = (float*)map_mem_buffer(mbuf_invtrans, MAP_WR); nuclear@12: memcpy(mem, invtrans, 16 * sizeof *mem); nuclear@12: unmap_mem_buffer(mbuf_invtrans); nuclear@8: } nuclear@8: nuclear@54: nuclear@54: const RendInfo *get_render_info() nuclear@54: { nuclear@54: return &rinf; nuclear@54: } nuclear@54: nuclear@55: const RenderStats *get_render_stats() nuclear@55: { nuclear@55: return &rstat; nuclear@55: } nuclear@55: nuclear@55: void print_render_stats(FILE *fp) nuclear@55: { nuclear@55: fprintf(fp, "-- render stats --\n"); nuclear@55: fprintf(fp, "> timing\n"); nuclear@55: fprintf(fp, " render time (msec): %lu\n", rstat.render_time); nuclear@55: fprintf(fp, " tex update time (msec): %lu\n", rstat.tex_update_time); nuclear@55: fprintf(fp, "> counters\n"); nuclear@55: fprintf(fp, " AABB tests: %d\n", rstat.aabb_tests); nuclear@55: fprintf(fp, " AABB tests per ray (min/max/avg): %d/%d/%f\n", nuclear@55: rstat.min_aabb_tests, rstat.max_aabb_tests, rstat.avg_aabb_tests); nuclear@55: fprintf(fp, " triangle tests: %d\n", rstat.triangle_tests); nuclear@55: fprintf(fp, " triangle tests per ray (min/max/avg): %d/%d/%f\n", nuclear@55: rstat.min_triangle_tests, rstat.max_triangle_tests, rstat.avg_triangle_tests); nuclear@55: fprintf(fp, " rays cast: %dp %dr %ds (sum: %d)\n", rstat.prim_rays, nuclear@55: rstat.refl_rays, rstat.shadow_rays, rstat.rays_cast); nuclear@55: fprintf(fp, " rays per second: %d\n", rstat.rays_per_sec); nuclear@55: fprintf(fp, " BRDF evaluations: %d\n", rstat.brdf_evals); nuclear@55: fputc('\n', fp); nuclear@55: } nuclear@55: nuclear@47: void set_render_option(int opt, bool val) nuclear@47: { nuclear@47: switch(opt) { nuclear@47: case ROPT_ITER: nuclear@47: case ROPT_REFL: nuclear@47: rinf.max_iter = val ? saved_iter_val : 0; nuclear@47: break; nuclear@47: nuclear@47: case ROPT_SHAD: nuclear@47: rinf.cast_shadows = val; nuclear@47: break; nuclear@47: nuclear@47: default: nuclear@47: return; nuclear@47: } nuclear@47: nuclear@47: update_render_info(); nuclear@47: } nuclear@47: nuclear@47: void set_render_option(int opt, int val) nuclear@47: { nuclear@47: switch(opt) { nuclear@47: case ROPT_ITER: nuclear@47: rinf.max_iter = saved_iter_val = val; nuclear@47: break; nuclear@47: nuclear@47: case ROPT_SHAD: nuclear@47: rinf.cast_shadows = val; nuclear@47: break; nuclear@47: nuclear@47: case ROPT_REFL: nuclear@47: rinf.max_iter = val ? saved_iter_val : 0; nuclear@47: break; nuclear@47: nuclear@47: default: nuclear@47: return; nuclear@47: } nuclear@47: nuclear@47: update_render_info(); nuclear@47: } nuclear@47: nuclear@47: void set_render_option(int opt, float val) nuclear@47: { nuclear@47: set_render_option(opt, (int)val); nuclear@47: } nuclear@47: nuclear@47: bool get_render_option_bool(int opt) nuclear@47: { nuclear@47: switch(opt) { nuclear@47: case ROPT_ITER: nuclear@47: return rinf.max_iter; nuclear@47: case ROPT_SHAD: nuclear@47: return rinf.cast_shadows; nuclear@47: case ROPT_REFL: nuclear@47: return rinf.max_iter == saved_iter_val; nuclear@47: default: nuclear@47: break; nuclear@47: } nuclear@47: return false; nuclear@47: } nuclear@47: nuclear@47: int get_render_option_int(int opt) nuclear@47: { nuclear@47: switch(opt) { nuclear@47: case ROPT_ITER: nuclear@47: return rinf.max_iter; nuclear@47: case ROPT_SHAD: nuclear@47: return rinf.cast_shadows ? 1 : 0; nuclear@47: case ROPT_REFL: nuclear@47: return rinf.max_iter == saved_iter_val ? 1 : 0; nuclear@47: default: nuclear@47: break; nuclear@47: } nuclear@47: return -1; nuclear@47: } nuclear@47: nuclear@47: float get_render_option_float(int opt) nuclear@47: { nuclear@47: return (float)get_render_option_int(opt); nuclear@47: } nuclear@47: nuclear@47: static void update_render_info() nuclear@47: { nuclear@47: if(!prog) { nuclear@47: return; nuclear@47: } nuclear@47: nuclear@47: CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_RENDER_INFO); nuclear@47: assert(mbuf); nuclear@47: nuclear@47: RendInfo *rinf_ptr = (RendInfo*)map_mem_buffer(mbuf, MAP_WR); nuclear@47: *rinf_ptr = rinf; nuclear@47: unmap_mem_buffer(mbuf); nuclear@47: } nuclear@47: nuclear@3: static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg) nuclear@2: { nuclear@2: float vfov = M_PI * vfov_deg / 180.0; nuclear@2: float aspect = (float)w / (float)h; nuclear@2: nuclear@2: float ysz = 2.0; nuclear@2: float xsz = aspect * ysz; nuclear@2: nuclear@2: float px = ((float)x / (float)w) * xsz - xsz / 2.0; nuclear@2: float py = 1.0 - ((float)y / (float)h) * ysz; nuclear@2: float pz = 1.0 / tan(0.5 * vfov); nuclear@2: nuclear@43: float mag = sqrt(px * px + py * py + pz * pz); nuclear@43: nuclear@45: px = px * RAY_MAG / mag; nuclear@45: py = py * RAY_MAG / mag; nuclear@45: pz = pz * RAY_MAG / mag; nuclear@2: nuclear@18: Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}}; nuclear@2: return ray; nuclear@2: } nuclear@43: nuclear@54: #define MIN(a, b) ((a) < (b) ? (a) : (b)) nuclear@54: nuclear@43: static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret) nuclear@43: { nuclear@45: int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT); nuclear@45: int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1; nuclear@45: int xsz = KDIMG_NODE_WIDTH * columns; nuclear@43: nuclear@43: printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes); nuclear@43: nuclear@43: float *img = new float[4 * xsz * ysz]; nuclear@43: memset(img, 0, 4 * xsz * ysz * sizeof *img); nuclear@43: nuclear@43: for(int i=0; i