clray

diff rt.cl @ 43:f9eec11e5acc

shoehorned the kdtree into an opnecl image and improved performance slightly
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 28 Aug 2010 09:38:49 +0100
parents 1bcbb53b3505
children 8047637961a2
line diff
     1.1 --- a/rt.cl	Sat Aug 28 02:01:16 2010 +0100
     1.2 +++ b/rt.cl	Sat Aug 28 09:38:49 2010 +0100
     1.3 @@ -51,26 +51,28 @@
     1.4  	global const struct Light *lights;
     1.5  	int num_lights;
     1.6  	global const struct Material *matlib;
     1.7 -	global const struct KDNode *kdtree;
     1.8 +	//global const struct KDNode *kdtree;
     1.9  };
    1.10  
    1.11  struct AABBox {
    1.12  	float4 min, max;
    1.13  };
    1.14  
    1.15 +#define MAX_NODE_FACES	32
    1.16  struct KDNode {
    1.17  	struct AABBox aabb;
    1.18 -	int face_idx[32];
    1.19 +	int face_idx[MAX_NODE_FACES];
    1.20  	int num_faces;
    1.21  	int left, right;
    1.22  	int padding;
    1.23  };
    1.24  
    1.25 +#define RAY_MAG		500.0
    1.26  #define MIN_ENERGY	0.001
    1.27  #define EPSILON		1e-5
    1.28  
    1.29 -float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp);
    1.30 -bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp);
    1.31 +float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg);
    1.32 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp, read_only image2d_t kdimg);
    1.33  bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp);
    1.34  bool intersect_aabb(struct Ray ray, struct AABBox aabb);
    1.35  
    1.36 @@ -80,6 +82,8 @@
    1.37  float4 calc_bary(float4 pt, global const struct Face *face, float4 norm);
    1.38  float mean(float4 v);
    1.39  
    1.40 +void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg);
    1.41 +
    1.42  
    1.43  kernel void render(write_only image2d_t fb,
    1.44  		global const struct RendInfo *rinf,
    1.45 @@ -89,7 +93,8 @@
    1.46  		global const struct Ray *primrays,
    1.47  		global const float *xform,
    1.48  		global const float *invtrans,
    1.49 -		global const struct KDNode *kdtree)
    1.50 +		//global const struct KDNode *kdtree
    1.51 +		read_only image2d_t kdtree_img)
    1.52  {
    1.53  	int idx = get_global_id(0);
    1.54  
    1.55 @@ -100,7 +105,7 @@
    1.56  	scn.lights = lights;
    1.57  	scn.num_lights = rinf->num_lights;
    1.58  	scn.matlib = matlib;
    1.59 -	scn.kdtree = kdtree;
    1.60 +	//scn.kdtree_img = kdtree_img;
    1.61  
    1.62  	struct Ray ray = primrays[idx];
    1.63  	transform_ray(&ray, xform, invtrans);
    1.64 @@ -111,8 +116,8 @@
    1.65  
    1.66  	while(iter++ < rinf->max_iter && mean(energy) > MIN_ENERGY) {
    1.67  		struct SurfPoint sp;
    1.68 -		if(find_intersection(ray, &scn, &sp)) {
    1.69 -			pixel += shade(ray, &scn, &sp) * energy;
    1.70 +		if(find_intersection(ray, &scn, &sp, kdtree_img)) {
    1.71 +			pixel += shade(ray, &scn, &sp, kdtree_img) * energy;
    1.72  
    1.73  			float4 refl_col = sp.mat.ks * sp.mat.kr;
    1.74  
    1.75 @@ -121,27 +126,25 @@
    1.76  
    1.77  			energy *= refl_col;
    1.78  		} else {
    1.79 -			break;
    1.80 +			energy = (float4)(0.0, 0.0, 0.0, 0.0);
    1.81  		}
    1.82  	}
    1.83  
    1.84 -	int img_x = get_image_width(fb);
    1.85 -
    1.86  	int2 coord;
    1.87 -	coord.x = idx % img_x;
    1.88 -	coord.y = idx / img_x;
    1.89 +	coord.x = idx % rinf->xsz;
    1.90 +	coord.y = idx / rinf->xsz;
    1.91  
    1.92  	write_imagef(fb, coord, pixel);
    1.93  }
    1.94  
    1.95 -float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp)
    1.96 +float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg)
    1.97  {
    1.98  	float4 norm = sp->norm;
    1.99 -	bool entering = true;
   1.100 +	//bool entering = true;
   1.101  
   1.102  	if(dot(ray.dir, norm) >= 0.0) {
   1.103  		norm = -norm;
   1.104 -		entering = false;
   1.105 +		//entering = false;
   1.106  	}
   1.107  
   1.108  	float4 dcol = scn->ambient * sp->mat.kd;
   1.109 @@ -154,16 +157,19 @@
   1.110  		shadowray.origin = sp->pos;
   1.111  		shadowray.dir = ldir;
   1.112  
   1.113 -		if(!find_intersection(shadowray, scn, 0)) {
   1.114 +		if(!find_intersection(shadowray, scn, 0, kdimg)) {
   1.115  			ldir = normalize(ldir);
   1.116 -			float4 vdir = -normalize(ray.dir);
   1.117 +			float4 vdir = -ray.dir;
   1.118 +			vdir.x = native_divide(vdir.x, RAY_MAG);
   1.119 +			vdir.y = native_divide(vdir.y, RAY_MAG);
   1.120 +			vdir.z = native_divide(vdir.z, RAY_MAG);
   1.121  			float4 vref = reflect(vdir, norm);
   1.122  
   1.123  			float diff = fmax(dot(ldir, norm), 0.0f);
   1.124 -			dcol += sp->mat.kd * scn->lights[i].color * diff;
   1.125 +			dcol += sp->mat.kd /* scn->lights[i].color*/ * diff;
   1.126  
   1.127 -			float spec = powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow);
   1.128 -			scol += sp->mat.ks * scn->lights[i].color * spec;
   1.129 +			float spec = native_powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow);
   1.130 +			scol += sp->mat.ks /* scn->lights[i].color*/ * spec;
   1.131  		}
   1.132  	}
   1.133  
   1.134 @@ -171,7 +177,7 @@
   1.135  }
   1.136  
   1.137  #define STACK_SIZE	64
   1.138 -bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres)
   1.139 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg)
   1.140  {
   1.141  	struct SurfPoint sp0;
   1.142  	sp0.t = 1.0;
   1.143 @@ -184,14 +190,15 @@
   1.144  	while(top > 0) {
   1.145  		int idx = idxstack[--top];	// remove this index from the stack and process it
   1.146  
   1.147 -		global const struct KDNode *node = scn->kdtree + idx;
   1.148 +		struct KDNode node;
   1.149 +		read_kdnode(idx, &node, kdimg);
   1.150  
   1.151 -		if(intersect_aabb(ray, node->aabb)) {
   1.152 -			if(node->left == -1) {
   1.153 +		if(intersect_aabb(ray, node.aabb)) {
   1.154 +			if(node.left == -1) {
   1.155  				// leaf node... check each face in turn and update the nearest intersection as needed
   1.156 -				for(int i=0; i<node->num_faces; i++) {
   1.157 +				for(int i=0; i<node.num_faces; i++) {
   1.158  					struct SurfPoint spt;
   1.159 -					int fidx = node->face_idx[i];
   1.160 +					int fidx = node.face_idx[i];
   1.161  
   1.162  					if(intersect(ray, scn->faces + fidx, &spt) && spt.t < sp0.t) {
   1.163  						sp0 = spt;
   1.164 @@ -199,8 +206,8 @@
   1.165  				}
   1.166  			} else {
   1.167  				// internal node... recurse to the children
   1.168 -				idxstack[top++] = node->left;
   1.169 -				idxstack[top++] = node->right;
   1.170 +				idxstack[top++] = node.left;
   1.171 +				idxstack[top++] = node.right;
   1.172  			}
   1.173  		}
   1.174  	}
   1.175 @@ -232,7 +239,7 @@
   1.176  	float4 vec = pt - origin;
   1.177  
   1.178  	float ndotvec = dot(norm, vec);
   1.179 -	float t = ndotvec / ndotdir;
   1.180 +	float t = native_divide(ndotvec, ndotdir);
   1.181  
   1.182  	if(t < EPSILON || t > 1.0) {
   1.183  		return false;
   1.184 @@ -269,12 +276,12 @@
   1.185  	};
   1.186  
   1.187  	int xsign = (int)(ray.dir.x < 0.0);
   1.188 -	float invdirx = 1.0 / ray.dir.x;
   1.189 +	float invdirx = native_recip(ray.dir.x);
   1.190  	float tmin = (bbox[xsign].x - ray.origin.x) * invdirx;
   1.191  	float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx;
   1.192  
   1.193  	int ysign = (int)(ray.dir.y < 0.0);
   1.194 -	float invdiry = 1.0 / ray.dir.y;
   1.195 +	float invdiry = native_recip(ray.dir.y);
   1.196  	float tymin = (bbox[ysign].y - ray.origin.y) * invdiry;
   1.197  	float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry;
   1.198  
   1.199 @@ -286,7 +293,7 @@
   1.200  	if(tymax < tmax) tmax = tymax;
   1.201  
   1.202  	int zsign = (int)(ray.dir.z < 0.0);
   1.203 -	float invdirz = 1.0 / ray.dir.z;
   1.204 +	float invdirz = native_recip(ray.dir.z);
   1.205  	float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz;
   1.206  	float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz;
   1.207  
   1.208 @@ -345,9 +352,9 @@
   1.209  	float a1 = fabs(dot(x20, norm)) * 0.5;
   1.210  	float a2 = fabs(dot(x01, norm)) * 0.5;
   1.211  
   1.212 -	bc.x = a0 / area;
   1.213 -	bc.y = a1 / area;
   1.214 -	bc.z = a2 / area;
   1.215 +	bc.x = native_divide(a0, area);
   1.216 +	bc.y = native_divide(a1, area);
   1.217 +	bc.z = native_divide(a2, area);
   1.218  	return bc;
   1.219  }
   1.220  
   1.221 @@ -355,3 +362,32 @@
   1.222  {
   1.223  	return native_divide(v.x + v.y + v.z, 3.0);
   1.224  }
   1.225 +
   1.226 +
   1.227 +const sampler_t kdsampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
   1.228 +
   1.229 +// read a KD-tree node from a texture scanline
   1.230 +void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg)
   1.231 +{
   1.232 +	int2 tc;
   1.233 +	tc.x = 0;
   1.234 +	tc.y = idx;
   1.235 +
   1.236 +	node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++;
   1.237 +	node->aabb.max = read_imagef(kdimg, kdsampler, tc);
   1.238 +
   1.239 +	tc.x = 2 + MAX_NODE_FACES / 4;
   1.240 +	float4 pix = read_imagef(kdimg, kdsampler, tc);
   1.241 +	node->num_faces = (int)pix.x;
   1.242 +	node->left = (int)pix.y;
   1.243 +	node->right = (int)pix.z;
   1.244 +
   1.245 +	tc.x = 2;
   1.246 +	for(int i=0; i<node->num_faces; i+=4) {
   1.247 +		float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++;
   1.248 +		node->face_idx[i] = (int)pix.x;
   1.249 +		node->face_idx[i + 1] = (int)pix.y;
   1.250 +		node->face_idx[i + 2] = (int)pix.z;
   1.251 +		node->face_idx[i + 3] = (int)pix.w;
   1.252 +	}
   1.253 +}