clray

diff src/rt.cc @ 43:f9eec11e5acc

shoehorned the kdtree into an opnecl image and improved performance slightly
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 28 Aug 2010 09:38:49 +0100
parents 057b8575a1c1
children 8047637961a2
line diff
     1.1 --- a/src/rt.cc	Sat Aug 28 02:01:16 2010 +0100
     1.2 +++ b/src/rt.cc	Sat Aug 28 09:38:49 2010 +0100
     1.3 @@ -39,6 +39,7 @@
     1.4  };
     1.5  
     1.6  static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg);
     1.7 +static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret);
     1.8  
     1.9  static Face *faces;
    1.10  static Ray *prim_rays;
    1.11 @@ -52,6 +53,9 @@
    1.12  
    1.13  static RendInfo rinf;
    1.14  
    1.15 +static long timing_sample_sum;
    1.16 +static long num_timing_samples;
    1.17 +
    1.18  
    1.19  bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex)
    1.20  {
    1.21 @@ -91,7 +95,9 @@
    1.22  		fprintf(stderr, "failed to create kdtree buffer\n");
    1.23  		return false;
    1.24  	}
    1.25 -	// XXX now we can actually destroy the original kdtree and keep only the GPU version
    1.26 +
    1.27 +	int kdimg_xsz, kdimg_ysz;
    1.28 +	float *kdimg_pixels = create_kdimage(kdbuf, scn->get_num_kdnodes(), &kdimg_xsz, &kdimg_ysz);
    1.29  
    1.30  	/* setup argument buffers */
    1.31  #ifdef CLGL_INTEROP
    1.32 @@ -106,7 +112,11 @@
    1.33  	prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
    1.34  	prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float));
    1.35  	prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float));
    1.36 -	prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf);
    1.37 +	//prog->set_arg_buffer(KARG_KDTREE, ARG_RD, scn->get_num_kdnodes() * sizeof *kdbuf, kdbuf);
    1.38 +	prog->set_arg_image(KARG_KDTREE, ARG_RD, kdimg_xsz, kdimg_ysz, kdimg_pixels);
    1.39 +
    1.40 +	delete [] kdimg_pixels;
    1.41 +
    1.42  
    1.43  	if(prog->get_num_args() < NUM_KERNEL_ARGS) {
    1.44  		return false;
    1.45 @@ -125,6 +135,8 @@
    1.46  void destroy_renderer()
    1.47  {
    1.48  	delete prog;
    1.49 +
    1.50 +	printf("rendertime mean: %ld msec\n", timing_sample_sum / num_timing_samples);
    1.51  }
    1.52  
    1.53  bool render()
    1.54 @@ -172,7 +184,11 @@
    1.55  	unmap_mem_buffer(mbuf);
    1.56  #endif
    1.57  
    1.58 -	printf("rendered in %ld msec\n", get_msec() - tm0);
    1.59 +	long msec = get_msec() - tm0;
    1.60 +	timing_sample_sum += msec;
    1.61 +	num_timing_samples++;
    1.62 +
    1.63 +	printf("rendered in %ld msec\n", msec);
    1.64  	return true;
    1.65  }
    1.66  
    1.67 @@ -273,10 +289,61 @@
    1.68  	float py = 1.0 - ((float)y / (float)h) * ysz;
    1.69  	float pz = 1.0 / tan(0.5 * vfov);
    1.70  
    1.71 -	px *= 100.0;
    1.72 -	py *= 100.0;
    1.73 -	pz *= 100.0;
    1.74 +	float mag = sqrt(px * px + py * py + pz * pz);
    1.75 +
    1.76 +	px = px * 500.0 / mag;
    1.77 +	py = py * 500.0 / mag;
    1.78 +	pz = pz * 500.0 / mag;
    1.79  
    1.80  	Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}};
    1.81  	return ray;
    1.82  }
    1.83 +
    1.84 +static int next_pow2(int x)
    1.85 +{
    1.86 +	x--;
    1.87 +	x = (x >> 1) | x;
    1.88 +	x = (x >> 2) | x;
    1.89 +	x = (x >> 4) | x;
    1.90 +	x = (x >> 8) | x;
    1.91 +	x = (x >> 16) | x;
    1.92 +	return x + 1;
    1.93 +}
    1.94 +
    1.95 +static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret)
    1.96 +{
    1.97 +	int xsz = 16;
    1.98 +	int ysz = next_pow2(num_nodes);
    1.99 +
   1.100 +	printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes);
   1.101 +
   1.102 +	float *img = new float[4 * xsz * ysz];
   1.103 +	memset(img, 0, 4 * xsz * ysz * sizeof *img);
   1.104 +
   1.105 +	for(int i=0; i<num_nodes; i++) {
   1.106 +		float *ptr = img + i * 4 * xsz;
   1.107 +
   1.108 +		*ptr++ = kdtree[i].aabb.min[0];
   1.109 +		*ptr++ = kdtree[i].aabb.min[1];
   1.110 +		*ptr++ = kdtree[i].aabb.min[2];
   1.111 +		*ptr++ = 0.0;
   1.112 +
   1.113 +		*ptr++ = kdtree[i].aabb.max[0];
   1.114 +		*ptr++ = kdtree[i].aabb.max[1];
   1.115 +		*ptr++ = kdtree[i].aabb.max[2];
   1.116 +		*ptr++ = 0.0;
   1.117 +
   1.118 +		for(int j=0; j<MAX_NODE_FACES; j++) {
   1.119 +			*ptr++ = j < kdtree[i].num_faces ? (float)kdtree[i].face_idx[j] : 0.0f;
   1.120 +		}
   1.121 +
   1.122 +		*ptr++ = (float)kdtree[i].num_faces;
   1.123 +		*ptr++ = (float)kdtree[i].left;
   1.124 +		*ptr++ = (float)kdtree[i].right;
   1.125 +		*ptr++ = 0.0;
   1.126 +	}
   1.127 +
   1.128 +	if(xsz_ret) *xsz_ret = xsz;
   1.129 +	if(ysz_ret) *ysz_ret = ysz;
   1.130 +	return img;
   1.131 +}