clray

diff src/rt.cl @ 54:6a30f27fa1e6

separated the OpenGL visualization and added a CPU raytracing mode
author John Tsiombikas <nuclear@member.fsf.org>
date Fri, 10 Sep 2010 16:47:00 +0100
parents rt.cl@30bf84881553
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/rt.cl	Fri Sep 10 16:47:00 2010 +0100
     1.3 @@ -0,0 +1,393 @@
     1.4 +/* vim: set ft=opencl:ts=4:sw=4 */
     1.5 +#include "common.h"
     1.6 +
     1.7 +struct RendInfo {
     1.8 +	float4 ambient;
     1.9 +	int xsz, ysz;
    1.10 +	int num_faces, num_lights;
    1.11 +	int max_iter;
    1.12 +	int cast_shadows;
    1.13 +};
    1.14 +
    1.15 +struct Vertex {
    1.16 +	float4 pos;
    1.17 +	float4 normal;
    1.18 +	float4 tex;
    1.19 +	float4 padding;
    1.20 +};
    1.21 +
    1.22 +struct Face {
    1.23 +	struct Vertex v[3];
    1.24 +	float4 normal;
    1.25 +	int matid;
    1.26 +	int padding[3];
    1.27 +};
    1.28 +
    1.29 +struct Material {
    1.30 +	float4 kd, ks;
    1.31 +	float kr, kt;
    1.32 +	float spow;
    1.33 +	float padding;
    1.34 +};
    1.35 +
    1.36 +struct Light {
    1.37 +	float4 pos, color;
    1.38 +};
    1.39 +
    1.40 +struct Ray {
    1.41 +	float4 origin, dir;
    1.42 +};
    1.43 +
    1.44 +struct SurfPoint {
    1.45 +	float t;
    1.46 +	float4 pos, norm, dbg;
    1.47 +	global const struct Face *obj;
    1.48 +	struct Material mat;
    1.49 +};
    1.50 +
    1.51 +struct Scene {
    1.52 +	float4 ambient;
    1.53 +	global const struct Face *faces;
    1.54 +	int num_faces;
    1.55 +	global const struct Light *lights;
    1.56 +	int num_lights;
    1.57 +	global const struct Material *matlib;
    1.58 +	//global const struct KDNode *kdtree;
    1.59 +	bool cast_shadows;
    1.60 +};
    1.61 +
    1.62 +struct AABBox {
    1.63 +	float4 min, max;
    1.64 +};
    1.65 +
    1.66 +struct KDNode {
    1.67 +	struct AABBox aabb;
    1.68 +	int face_idx[MAX_NODE_FACES];
    1.69 +	int num_faces;
    1.70 +	int left, right;
    1.71 +	int padding;
    1.72 +};
    1.73 +
    1.74 +#define MIN_ENERGY	0.001
    1.75 +
    1.76 +float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg);
    1.77 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp, read_only image2d_t kdimg);
    1.78 +bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp);
    1.79 +bool intersect_aabb(struct Ray ray, struct AABBox aabb);
    1.80 +
    1.81 +float4 reflect(float4 v, float4 n);
    1.82 +float4 transform(float4 v, global const float *xform);
    1.83 +void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans);
    1.84 +float4 calc_bary(float4 pt, global const struct Face *face, float4 norm);
    1.85 +float mean(float4 v);
    1.86 +
    1.87 +void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg);
    1.88 +
    1.89 +
    1.90 +kernel void render(write_only image2d_t fb,
    1.91 +		global const struct RendInfo *rinf,
    1.92 +		global const struct Face *faces,
    1.93 +		global const struct Material *matlib,
    1.94 +		global const struct Light *lights,
    1.95 +		global const struct Ray *primrays,
    1.96 +		global const float *xform,
    1.97 +		global const float *invtrans,
    1.98 +		//global const struct KDNode *kdtree
    1.99 +		read_only image2d_t kdtree_img)
   1.100 +{
   1.101 +	int idx = get_global_id(0);
   1.102 +
   1.103 +	struct Scene scn;
   1.104 +	scn.ambient = rinf->ambient;
   1.105 +	scn.faces = faces;
   1.106 +	scn.num_faces = rinf->num_faces;
   1.107 +	scn.lights = lights;
   1.108 +	scn.num_lights = rinf->num_lights;
   1.109 +	scn.matlib = matlib;
   1.110 +	scn.cast_shadows = rinf->cast_shadows;
   1.111 +
   1.112 +	struct Ray ray = primrays[idx];
   1.113 +	transform_ray(&ray, xform, invtrans);
   1.114 +
   1.115 +	float4 pixel = (float4)(0, 0, 0, 0);
   1.116 +	float4 energy = (float4)(1.0, 1.0, 1.0, 0.0);
   1.117 +	int iter = 0;
   1.118 +
   1.119 +	while(iter++ <= rinf->max_iter && mean(energy) > MIN_ENERGY) {
   1.120 +		struct SurfPoint sp;
   1.121 +		if(find_intersection(ray, &scn, &sp, kdtree_img)) {
   1.122 +			pixel += shade(ray, &scn, &sp, kdtree_img) * energy;
   1.123 +
   1.124 +			float4 refl_col = sp.mat.ks * sp.mat.kr;
   1.125 +
   1.126 +			ray.origin = sp.pos;
   1.127 +			ray.dir = reflect(-ray.dir, sp.norm);
   1.128 +
   1.129 +			energy *= refl_col;
   1.130 +		} else {
   1.131 +			energy = (float4)(0.0, 0.0, 0.0, 0.0);
   1.132 +		}
   1.133 +	}
   1.134 +
   1.135 +	int2 coord;
   1.136 +	coord.x = idx % rinf->xsz;
   1.137 +	coord.y = idx / rinf->xsz;
   1.138 +
   1.139 +	write_imagef(fb, coord, pixel);
   1.140 +}
   1.141 +
   1.142 +float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg)
   1.143 +{
   1.144 +	float4 norm = sp->norm;
   1.145 +	//bool entering = true;
   1.146 +
   1.147 +	if(dot(ray.dir, norm) >= 0.0) {
   1.148 +		norm = -norm;
   1.149 +		//entering = false;
   1.150 +	}
   1.151 +
   1.152 +	float4 dcol = scn->ambient * sp->mat.kd;
   1.153 +	float4 scol = (float4)(0, 0, 0, 0);
   1.154 +
   1.155 +	for(int i=0; i<scn->num_lights; i++) {
   1.156 +		float4 ldir = scn->lights[i].pos - sp->pos;
   1.157 +
   1.158 +		struct Ray shadowray;
   1.159 +		shadowray.origin = sp->pos;
   1.160 +		shadowray.dir = ldir;
   1.161 +
   1.162 +		if(!scn->cast_shadows || !find_intersection(shadowray, scn, 0, kdimg)) {
   1.163 +			ldir = normalize(ldir);
   1.164 +			float4 vdir = -ray.dir;
   1.165 +			vdir.x = native_divide(vdir.x, RAY_MAG);
   1.166 +			vdir.y = native_divide(vdir.y, RAY_MAG);
   1.167 +			vdir.z = native_divide(vdir.z, RAY_MAG);
   1.168 +			float4 vref = reflect(vdir, norm);
   1.169 +
   1.170 +			float diff = fmax(dot(ldir, norm), 0.0f);
   1.171 +			dcol += sp->mat.kd /* scn->lights[i].color*/ * diff;
   1.172 +
   1.173 +			float spec = native_powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow);
   1.174 +			scol += sp->mat.ks /* scn->lights[i].color*/ * spec;
   1.175 +		}
   1.176 +	}
   1.177 +
   1.178 +	return dcol + scol;
   1.179 +}
   1.180 +
   1.181 +#define STACK_SIZE	MAX_TREE_DEPTH
   1.182 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg)
   1.183 +{
   1.184 +	struct SurfPoint sp0;
   1.185 +	sp0.t = 1.0;
   1.186 +	sp0.obj = 0;
   1.187 +
   1.188 +	int idxstack[STACK_SIZE];
   1.189 +	int top = 0;			// points after the topmost element of the stack
   1.190 +	idxstack[top++] = 0;	// root at tree[0]
   1.191 +
   1.192 +	while(top > 0) {
   1.193 +		int idx = idxstack[--top];	// remove this index from the stack and process it
   1.194 +
   1.195 +		struct KDNode node;
   1.196 +		read_kdnode(idx, &node, kdimg);
   1.197 +
   1.198 +		if(intersect_aabb(ray, node.aabb)) {
   1.199 +			if(node.left == -1) {
   1.200 +				// leaf node... check each face in turn and update the nearest intersection as needed
   1.201 +				for(int i=0; i<node.num_faces; i++) {
   1.202 +					struct SurfPoint spt;
   1.203 +					int fidx = node.face_idx[i];
   1.204 +
   1.205 +					if(intersect(ray, scn->faces + fidx, &spt) && spt.t < sp0.t) {
   1.206 +						sp0 = spt;
   1.207 +					}
   1.208 +				}
   1.209 +			} else {
   1.210 +				// internal node... recurse to the children
   1.211 +				idxstack[top++] = node.left;
   1.212 +				idxstack[top++] = node.right;
   1.213 +			}
   1.214 +		}
   1.215 +	}
   1.216 +
   1.217 +	if(!sp0.obj) {
   1.218 +		return false;
   1.219 +	}
   1.220 +
   1.221 +	if(spres) {
   1.222 +		*spres = sp0;
   1.223 +		spres->mat = scn->matlib[sp0.obj->matid];
   1.224 +	}
   1.225 +	return true;
   1.226 +}
   1.227 +
   1.228 +bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp)
   1.229 +{
   1.230 +	float4 origin = ray.origin;
   1.231 +	float4 dir = ray.dir;
   1.232 +	float4 norm = face->normal;
   1.233 +
   1.234 +	float ndotdir = dot(dir, norm);
   1.235 +
   1.236 +	if(fabs(ndotdir) <= EPSILON) {
   1.237 +		return false;
   1.238 +	}
   1.239 +
   1.240 +	float4 pt = face->v[0].pos;
   1.241 +	float4 vec = pt - origin;
   1.242 +
   1.243 +	float ndotvec = dot(norm, vec);
   1.244 +	float t = native_divide(ndotvec, ndotdir);
   1.245 +
   1.246 +	if(t < EPSILON || t > 1.0) {
   1.247 +		return false;
   1.248 +	}
   1.249 +	pt = origin + dir * t;
   1.250 +
   1.251 +
   1.252 +	float4 bc = calc_bary(pt, face, norm);
   1.253 +	float bc_sum = bc.x + bc.y + bc.z;
   1.254 +
   1.255 +	if(bc_sum < 1.0 - EPSILON || bc_sum > 1.0 + EPSILON) {
   1.256 +		return false;
   1.257 +	}
   1.258 +
   1.259 +	sp->t = t;
   1.260 +	sp->pos = pt;
   1.261 +	sp->norm = normalize(face->v[0].normal * bc.x + face->v[1].normal * bc.y + face->v[2].normal * bc.z);
   1.262 +	sp->obj = face;
   1.263 +	sp->dbg = bc;
   1.264 +	return true;
   1.265 +}
   1.266 +
   1.267 +bool intersect_aabb(struct Ray ray, struct AABBox aabb)
   1.268 +{
   1.269 +	if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z &&
   1.270 +			ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) {
   1.271 +		return true;
   1.272 +	}
   1.273 +
   1.274 +	float4 bbox[2] = {
   1.275 +		aabb.min.x, aabb.min.y, aabb.min.z, 0,
   1.276 +		aabb.max.x, aabb.max.y, aabb.max.z, 0
   1.277 +	};
   1.278 +
   1.279 +	int xsign = (int)(ray.dir.x < 0.0);
   1.280 +	float invdirx = native_recip(ray.dir.x);
   1.281 +	float tmin = (bbox[xsign].x - ray.origin.x) * invdirx;
   1.282 +	float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx;
   1.283 +
   1.284 +	int ysign = (int)(ray.dir.y < 0.0);
   1.285 +	float invdiry = native_recip(ray.dir.y);
   1.286 +	float tymin = (bbox[ysign].y - ray.origin.y) * invdiry;
   1.287 +	float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry;
   1.288 +
   1.289 +	if(tmin > tymax || tymin > tmax) {
   1.290 +		return false;
   1.291 +	}
   1.292 +
   1.293 +	if(tymin > tmin) tmin = tymin;
   1.294 +	if(tymax < tmax) tmax = tymax;
   1.295 +
   1.296 +	int zsign = (int)(ray.dir.z < 0.0);
   1.297 +	float invdirz = native_recip(ray.dir.z);
   1.298 +	float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz;
   1.299 +	float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz;
   1.300 +
   1.301 +	if(tmin > tzmax || tzmin > tmax) {
   1.302 +		return false;
   1.303 +	}
   1.304 +
   1.305 +	return tmin < 1.0 && tmax > 0.0;
   1.306 +}
   1.307 +
   1.308 +float4 reflect(float4 v, float4 n)
   1.309 +{
   1.310 +	return 2.0f * dot(v, n) * n - v;
   1.311 +}
   1.312 +
   1.313 +float4 transform(float4 v, global const float *xform)
   1.314 +{
   1.315 +	float4 res;
   1.316 +	res.x = v.x * xform[0] + v.y * xform[4] + v.z * xform[8] + xform[12];
   1.317 +	res.y = v.x * xform[1] + v.y * xform[5] + v.z * xform[9] + xform[13];
   1.318 +	res.z = v.x * xform[2] + v.y * xform[6] + v.z * xform[10] + xform[14];
   1.319 +	res.w = 0.0;
   1.320 +	return res;
   1.321 +}
   1.322 +
   1.323 +void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans)
   1.324 +{
   1.325 +	ray->origin = transform(ray->origin, xform);
   1.326 +	ray->dir = transform(ray->dir, invtrans);
   1.327 +}
   1.328 +
   1.329 +float4 calc_bary(float4 pt, global const struct Face *face, float4 norm)
   1.330 +{
   1.331 +	float4 bc = (float4)(0, 0, 0, 0);
   1.332 +
   1.333 +	// calculate area of the whole triangle
   1.334 +	float4 v1 = face->v[1].pos - face->v[0].pos;
   1.335 +	float4 v2 = face->v[2].pos - face->v[0].pos;
   1.336 +	float4 xv1v2 = cross(v1, v2);
   1.337 +
   1.338 +	float area = fabs(dot(xv1v2, norm)) * 0.5;
   1.339 +	if(area < EPSILON) {
   1.340 +		return bc;
   1.341 +	}
   1.342 +
   1.343 +	float4 pv0 = face->v[0].pos - pt;
   1.344 +	float4 pv1 = face->v[1].pos - pt;
   1.345 +	float4 pv2 = face->v[2].pos - pt;
   1.346 +
   1.347 +	// calculate the area of each sub-triangle
   1.348 +	float4 x12 = cross(pv1, pv2);
   1.349 +	float4 x20 = cross(pv2, pv0);
   1.350 +	float4 x01 = cross(pv0, pv1);
   1.351 +
   1.352 +	float a0 = fabs(dot(x12, norm)) * 0.5;
   1.353 +	float a1 = fabs(dot(x20, norm)) * 0.5;
   1.354 +	float a2 = fabs(dot(x01, norm)) * 0.5;
   1.355 +
   1.356 +	bc.x = native_divide(a0, area);
   1.357 +	bc.y = native_divide(a1, area);
   1.358 +	bc.z = native_divide(a2, area);
   1.359 +	return bc;
   1.360 +}
   1.361 +
   1.362 +float mean(float4 v)
   1.363 +{
   1.364 +	return native_divide(v.x + v.y + v.z, 3.0);
   1.365 +}
   1.366 +
   1.367 +
   1.368 +const sampler_t kdsampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
   1.369 +
   1.370 +// read a KD-tree node from a texture scanline
   1.371 +void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg)
   1.372 +{
   1.373 +	int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT);
   1.374 +
   1.375 +	int2 tc;
   1.376 +	tc.x = startx;
   1.377 +	tc.y = idx % KDIMG_MAX_HEIGHT;
   1.378 +
   1.379 +	node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++;
   1.380 +	node->aabb.max = read_imagef(kdimg, kdsampler, tc);
   1.381 +
   1.382 +	tc.x = startx + 2 + MAX_NODE_FACES / 4;
   1.383 +	float4 pix = read_imagef(kdimg, kdsampler, tc);
   1.384 +	node->num_faces = (int)pix.x;
   1.385 +	node->left = (int)pix.y;
   1.386 +	node->right = (int)pix.z;
   1.387 +
   1.388 +	tc.x = startx + 2;
   1.389 +	for(int i=0; i<node->num_faces; i+=4) {
   1.390 +		float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++;
   1.391 +		node->face_idx[i] = (int)pix.x;
   1.392 +		node->face_idx[i + 1] = (int)pix.y;
   1.393 +		node->face_idx[i + 2] = (int)pix.z;
   1.394 +		node->face_idx[i + 3] = (int)pix.w;
   1.395 +	}
   1.396 +}