nuclear@0: #include nuclear@8: #include nuclear@2: #include nuclear@0: #include John@14: #include "ogl.h" nuclear@0: #include "ocl.h" nuclear@22: #include "scene.h" nuclear@32: #include "timer.h" nuclear@45: #include "common.h" nuclear@0: nuclear@12: // kernel arguments nuclear@12: enum { nuclear@12: KARG_FRAMEBUFFER, nuclear@12: KARG_RENDER_INFO, nuclear@12: KARG_FACES, nuclear@12: KARG_MATLIB, nuclear@12: KARG_LIGHTS, nuclear@12: KARG_PRIM_RAYS, nuclear@12: KARG_XFORM, John@14: KARG_INVTRANS_XFORM, nuclear@28: KARG_KDTREE, John@14: John@14: NUM_KERNEL_ARGS nuclear@12: }; John@11: nuclear@2: struct RendInfo { nuclear@22: float ambient[4]; nuclear@2: int xsz, ysz; nuclear@9: int num_faces, num_lights; nuclear@2: int max_iter; nuclear@28: int kd_depth; nuclear@12: }; nuclear@2: nuclear@1: struct Ray { nuclear@8: float origin[4], dir[4]; nuclear@12: }; nuclear@1: nuclear@3: struct Light { nuclear@8: float pos[4], color[4]; nuclear@12: }; nuclear@1: nuclear@3: static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg); nuclear@43: static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret); nuclear@3: nuclear@13: static Face *faces; nuclear@3: static Ray *prim_rays; nuclear@3: static CLProgram *prog; nuclear@3: static int global_size; nuclear@3: nuclear@4: static Light lightlist[] = { nuclear@22: {{-8, 15, 18, 0}, {1, 1, 1, 1}} nuclear@4: }; nuclear@4: nuclear@7: nuclear@4: static RendInfo rinf; nuclear@4: nuclear@43: static long timing_sample_sum; nuclear@43: static long num_timing_samples; nuclear@43: nuclear@4: nuclear@39: bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex) nuclear@0: { nuclear@4: // render info nuclear@22: rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0; nuclear@16: rinf.ambient[3] = 0.0; nuclear@16: nuclear@4: rinf.xsz = xsz; nuclear@4: rinf.ysz = ysz; nuclear@13: rinf.num_faces = scn->get_num_faces(); nuclear@4: rinf.num_lights = sizeof lightlist / sizeof *lightlist; nuclear@4: rinf.max_iter = 6; nuclear@28: rinf.kd_depth = kdtree_depth(scn->kdtree); nuclear@4: nuclear@3: /* calculate primary rays */ nuclear@3: prim_rays = new Ray[xsz * ysz]; nuclear@2: nuclear@2: for(int i=0; iload("rt.cl")) { nuclear@8: return false; nuclear@0: } nuclear@0: nuclear@24: if(!(faces = (Face*)scn->get_face_buffer())) { nuclear@13: fprintf(stderr, "failed to create face buffer\n"); nuclear@13: return false; nuclear@13: } nuclear@13: nuclear@28: const KDNodeGPU *kdbuf = scn->get_kdtree_buffer(); nuclear@28: if(!kdbuf) { nuclear@28: fprintf(stderr, "failed to create kdtree buffer\n"); nuclear@28: return false; nuclear@28: } nuclear@43: nuclear@43: int kdimg_xsz, kdimg_ysz; nuclear@43: float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz); nuclear@28: nuclear@3: /* setup argument buffers */ nuclear@41: #ifdef CLGL_INTEROP nuclear@39: prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex); nuclear@41: #else nuclear@41: prog->set_arg_image(KARG_FRAMEBUFFER, ARG_WR, xsz, ysz); nuclear@41: #endif nuclear@12: prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf); John@14: prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces); John@14: prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials()); nuclear@12: prog->set_arg_buffer(KARG_LIGHTS, ARG_RD, sizeof lightlist, lightlist); nuclear@12: prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays); nuclear@12: prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float)); nuclear@12: prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float)); nuclear@43: //prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf); nuclear@43: prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels); nuclear@43: nuclear@43: delete [] kdimg_pixels; nuclear@43: nuclear@12: John@14: if(prog->get_num_args() < NUM_KERNEL_ARGS) { John@14: return false; John@14: } John@14: nuclear@45: const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math"; nuclear@45: if(!prog->build(opt)) { nuclear@16: return false; nuclear@16: } nuclear@16: nuclear@12: delete [] prim_rays; nuclear@2: nuclear@3: global_size = xsz * ysz; nuclear@3: return true; nuclear@3: } nuclear@3: nuclear@3: void destroy_renderer() nuclear@3: { nuclear@3: delete prog; nuclear@43: nuclear@43: printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples); nuclear@3: } nuclear@3: nuclear@3: bool render() nuclear@3: { nuclear@39: // XXX do we need to call glFinish ? nuclear@39: nuclear@32: long tm0 = get_msec(); nuclear@32: nuclear@40: #ifdef CLGL_INTEROP nuclear@39: cl_event ev; nuclear@39: CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER); nuclear@39: nuclear@39: if(!acquire_gl_object(texbuf, &ev)) { nuclear@39: return false; nuclear@39: } nuclear@39: nuclear@39: // make sure that we will wait for the acquire to finish before running nuclear@39: prog->set_wait_event(ev); nuclear@40: #endif nuclear@39: nuclear@3: if(!prog->run(1, global_size)) { nuclear@3: return false; nuclear@0: } John@15: nuclear@40: #ifdef CLGL_INTEROP nuclear@39: if(!release_gl_object(texbuf, &ev)) { nuclear@39: return false; nuclear@39: } nuclear@39: clWaitForEvents(1, &ev); nuclear@40: #endif nuclear@39: nuclear@40: #ifndef CLGL_INTEROP nuclear@40: /* if we don't compile in CL/GL interoperability support, we need nuclear@40: * to copy the output buffer to the OpenGL texture used to displaying nuclear@40: * the image. nuclear@40: */ nuclear@13: CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER); nuclear@12: void *fb = map_mem_buffer(mbuf, MAP_RD); nuclear@13: if(!fb) { nuclear@13: fprintf(stderr, "FAILED\n"); nuclear@13: return false; nuclear@13: } nuclear@13: nuclear@12: glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rinf.xsz, rinf.ysz, GL_RGBA, GL_FLOAT, fb); nuclear@2: unmap_mem_buffer(mbuf); nuclear@40: #endif nuclear@32: nuclear@43: long msec = get_msec() - tm0; nuclear@43: timing_sample_sum += msec; nuclear@43: num_timing_samples++; nuclear@43: nuclear@43: printf("rendered in %ld msec\n", msec); nuclear@3: return true; nuclear@0: } nuclear@2: nuclear@27: #define MIN(a, b) ((a) < (b) ? (a) : (b)) nuclear@21: static void dbg_set_gl_material(Material *mat) nuclear@21: { nuclear@21: static Material def_mat = {{0.7, 0.7, 0.7, 1}, {0, 0, 0, 0}, 0, 0, 0}; nuclear@21: nuclear@21: if(!mat) mat = &def_mat; nuclear@21: nuclear@21: glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, mat->kd); nuclear@21: glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, mat->ks); nuclear@27: glMaterialf(GL_FRONT_AND_BACK, GL_SHININESS, MIN(mat->spow, 128.0f)); nuclear@21: } nuclear@21: nuclear@27: void dbg_render_gl(Scene *scn, bool show_tree, bool show_obj) nuclear@8: { nuclear@22: glPushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT | GL_LIGHTING_BIT); nuclear@8: nuclear@21: for(int i=0; iget_materials(); John@14: nuclear@27: int num_faces = scn->get_num_faces(); nuclear@27: int cur_mat = -1; nuclear@21: nuclear@27: for(int i=0; idraw_kdtree(); nuclear@12: } nuclear@12: nuclear@12: glPopMatrix(); nuclear@12: glPopAttrib(); nuclear@22: nuclear@22: assert(glGetError() == GL_NO_ERROR); nuclear@12: } nuclear@12: nuclear@12: void set_xform(float *matrix, float *invtrans) nuclear@12: { nuclear@12: CLMemBuffer *mbuf_xform = prog->get_arg_buffer(KARG_XFORM); nuclear@12: CLMemBuffer *mbuf_invtrans = prog->get_arg_buffer(KARG_INVTRANS_XFORM); nuclear@12: assert(mbuf_xform && mbuf_invtrans); nuclear@12: nuclear@12: float *mem = (float*)map_mem_buffer(mbuf_xform, MAP_WR); nuclear@12: memcpy(mem, matrix, 16 * sizeof *mem); nuclear@12: unmap_mem_buffer(mbuf_xform); nuclear@12: nuclear@12: mem = (float*)map_mem_buffer(mbuf_invtrans, MAP_WR); nuclear@12: memcpy(mem, invtrans, 16 * sizeof *mem); nuclear@12: unmap_mem_buffer(mbuf_invtrans); nuclear@8: } nuclear@8: nuclear@3: static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg) nuclear@2: { nuclear@2: float vfov = M_PI * vfov_deg / 180.0; nuclear@2: float aspect = (float)w / (float)h; nuclear@2: nuclear@2: float ysz = 2.0; nuclear@2: float xsz = aspect * ysz; nuclear@2: nuclear@2: float px = ((float)x / (float)w) * xsz - xsz / 2.0; nuclear@2: float py = 1.0 - ((float)y / (float)h) * ysz; nuclear@2: float pz = 1.0 / tan(0.5 * vfov); nuclear@2: nuclear@43: float mag = sqrt(px * px + py * py + pz * pz); nuclear@43: nuclear@45: px = px * RAY_MAG / mag; nuclear@45: py = py * RAY_MAG / mag; nuclear@45: pz = pz * RAY_MAG / mag; nuclear@2: nuclear@18: Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}}; nuclear@2: return ray; nuclear@2: } nuclear@43: nuclear@43: static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret) nuclear@43: { nuclear@45: int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT); nuclear@45: int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1; nuclear@45: int xsz = KDIMG_NODE_WIDTH * columns; nuclear@43: nuclear@43: printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes); nuclear@43: nuclear@43: float *img = new float[4 * xsz * ysz]; nuclear@43: memset(img, 0, 4 * xsz * ysz * sizeof *img); nuclear@43: nuclear@43: for(int i=0; i