clray
diff src/rt.cc @ 43:f9eec11e5acc
shoehorned the kdtree into an opnecl image and improved performance slightly
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 28 Aug 2010 09:38:49 +0100 |
parents | 057b8575a1c1 |
children | 8047637961a2 |
line diff
1.1 --- a/src/rt.cc Sat Aug 28 02:01:16 2010 +0100 1.2 +++ b/src/rt.cc Sat Aug 28 09:38:49 2010 +0100 1.3 @@ -39,6 +39,7 @@ 1.4 }; 1.5 1.6 static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg); 1.7 +static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret); 1.8 1.9 static Face *faces; 1.10 static Ray *prim_rays; 1.11 @@ -52,6 +53,9 @@ 1.12 1.13 static RendInfo rinf; 1.14 1.15 +static long timing_sample_sum; 1.16 +static long num_timing_samples; 1.17 + 1.18 1.19 bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex) 1.20 { 1.21 @@ -91,7 +95,9 @@ 1.22 fprintf(stderr, "failed to create kdtree buffer\n"); 1.23 return false; 1.24 } 1.25 - // XXX now we can actually destroy the original kdtree and keep only the GPU version 1.26 + 1.27 + int kdimg_xsz, kdimg_ysz; 1.28 + float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz); 1.29 1.30 /* setup argument buffers */ 1.31 #ifdef CLGL_INTEROP 1.32 @@ -106,7 +112,11 @@ 1.33 prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays); 1.34 prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float)); 1.35 prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float)); 1.36 - prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf); 1.37 + //prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf); 1.38 + prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels); 1.39 + 1.40 + delete [] kdimg_pixels; 1.41 + 1.42 1.43 if(prog->get_num_args() < NUM_KERNEL_ARGS) { 1.44 return false; 1.45 @@ -125,6 +135,8 @@ 1.46 void destroy_renderer() 1.47 { 1.48 delete prog; 1.49 + 1.50 + printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples); 1.51 } 1.52 1.53 bool render() 1.54 @@ -172,7 +184,11 @@ 1.55 unmap_mem_buffer(mbuf); 1.56 #endif 1.57 1.58 - printf("rendered in %ld msec\n", get_msec() - tm0); 1.59 + long msec = get_msec() - tm0; 1.60 + timing_sample_sum += msec; 1.61 + num_timing_samples++; 1.62 + 1.63 + printf("rendered in %ld msec\n", msec); 1.64 return true; 1.65 } 1.66 1.67 @@ -273,10 +289,61 @@ 1.68 float py = 1.0 - ((float)y / (float)h) * ysz; 1.69 float pz = 1.0 / tan(0.5 * vfov); 1.70 1.71 - px *= 100.0; 1.72 - py *= 100.0; 1.73 - pz *= 100.0; 1.74 + float mag = sqrt(px * px + py * py + pz * pz); 1.75 + 1.76 + px = px * 500.0 / mag; 1.77 + py = py * 500.0 / mag; 1.78 + pz = pz * 500.0 / mag; 1.79 1.80 Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}}; 1.81 return ray; 1.82 } 1.83 + 1.84 +static int next_pow2(int x) 1.85 +{ 1.86 + x--; 1.87 + x = (x >> 1) | x; 1.88 + x = (x >> 2) | x; 1.89 + x = (x >> 4) | x; 1.90 + x = (x >> 8) | x; 1.91 + x = (x >> 16) | x; 1.92 + return x + 1; 1.93 +} 1.94 + 1.95 +static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret) 1.96 +{ 1.97 + int xsz = 16; 1.98 + int ysz = next_pow2(num_nodes); 1.99 + 1.100 + printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes); 1.101 + 1.102 + float *img = new float[4 * xsz * ysz]; 1.103 + memset(img, 0, 4 * xsz * ysz * sizeof *img); 1.104 + 1.105 + for(int i=0; i<num_nodes; i++) { 1.106 + float *ptr = img + i * 4 * xsz; 1.107 + 1.108 + *ptr++ = kdtree[i].aabb.min[0]; 1.109 + *ptr++ = kdtree[i].aabb.min[1]; 1.110 + *ptr++ = kdtree[i].aabb.min[2]; 1.111 + *ptr++ = 0.0; 1.112 + 1.113 + *ptr++ = kdtree[i].aabb.max[0]; 1.114 + *ptr++ = kdtree[i].aabb.max[1]; 1.115 + *ptr++ = kdtree[i].aabb.max[2]; 1.116 + *ptr++ = 0.0; 1.117 + 1.118 + for(int j=0; j<MAX_NODE_FACES; j++) { 1.119 + *ptr++ = j < kdtree[i].num_faces ? (float)kdtree[i].face_idx[j] : 0.0f; 1.120 + } 1.121 + 1.122 + *ptr++ = (float)kdtree[i].num_faces; 1.123 + *ptr++ = (float)kdtree[i].left; 1.124 + *ptr++ = (float)kdtree[i].right; 1.125 + *ptr++ = 0.0; 1.126 + } 1.127 + 1.128 + if(xsz_ret) *xsz_ret = xsz; 1.129 + if(ysz_ret) *ysz_ret = ysz; 1.130 + return img; 1.131 +}