clray
diff src/rt.cl @ 54:6a30f27fa1e6
separated the OpenGL visualization and added a CPU raytracing mode
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Fri, 10 Sep 2010 16:47:00 +0100 |
parents | rt.cl@30bf84881553 |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/rt.cl Fri Sep 10 16:47:00 2010 +0100 1.3 @@ -0,0 +1,393 @@ 1.4 +/* vim: set ft=opencl:ts=4:sw=4 */ 1.5 +#include "common.h" 1.6 + 1.7 +struct RendInfo { 1.8 + float4 ambient; 1.9 + int xsz, ysz; 1.10 + int num_faces, num_lights; 1.11 + int max_iter; 1.12 + int cast_shadows; 1.13 +}; 1.14 + 1.15 +struct Vertex { 1.16 + float4 pos; 1.17 + float4 normal; 1.18 + float4 tex; 1.19 + float4 padding; 1.20 +}; 1.21 + 1.22 +struct Face { 1.23 + struct Vertex v[3]; 1.24 + float4 normal; 1.25 + int matid; 1.26 + int padding[3]; 1.27 +}; 1.28 + 1.29 +struct Material { 1.30 + float4 kd, ks; 1.31 + float kr, kt; 1.32 + float spow; 1.33 + float padding; 1.34 +}; 1.35 + 1.36 +struct Light { 1.37 + float4 pos, color; 1.38 +}; 1.39 + 1.40 +struct Ray { 1.41 + float4 origin, dir; 1.42 +}; 1.43 + 1.44 +struct SurfPoint { 1.45 + float t; 1.46 + float4 pos, norm, dbg; 1.47 + global const struct Face *obj; 1.48 + struct Material mat; 1.49 +}; 1.50 + 1.51 +struct Scene { 1.52 + float4 ambient; 1.53 + global const struct Face *faces; 1.54 + int num_faces; 1.55 + global const struct Light *lights; 1.56 + int num_lights; 1.57 + global const struct Material *matlib; 1.58 + //global const struct KDNode *kdtree; 1.59 + bool cast_shadows; 1.60 +}; 1.61 + 1.62 +struct AABBox { 1.63 + float4 min, max; 1.64 +}; 1.65 + 1.66 +struct KDNode { 1.67 + struct AABBox aabb; 1.68 + int face_idx[MAX_NODE_FACES]; 1.69 + int num_faces; 1.70 + int left, right; 1.71 + int padding; 1.72 +}; 1.73 + 1.74 +#define MIN_ENERGY 0.001 1.75 + 1.76 +float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg); 1.77 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp, read_only image2d_t kdimg); 1.78 +bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp); 1.79 +bool intersect_aabb(struct Ray ray, struct AABBox aabb); 1.80 + 1.81 +float4 reflect(float4 v, float4 n); 1.82 +float4 transform(float4 v, global const float *xform); 1.83 +void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans); 1.84 +float4 calc_bary(float4 pt, global const struct Face *face, float4 norm); 1.85 +float mean(float4 v); 1.86 + 1.87 +void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg); 1.88 + 1.89 + 1.90 +kernel void render(write_only image2d_t fb, 1.91 + global const struct RendInfo *rinf, 1.92 + global const struct Face *faces, 1.93 + global const struct Material *matlib, 1.94 + global const struct Light *lights, 1.95 + global const struct Ray *primrays, 1.96 + global const float *xform, 1.97 + global const float *invtrans, 1.98 + //global const struct KDNode *kdtree 1.99 + read_only image2d_t kdtree_img) 1.100 +{ 1.101 + int idx = get_global_id(0); 1.102 + 1.103 + struct Scene scn; 1.104 + scn.ambient = rinf->ambient; 1.105 + scn.faces = faces; 1.106 + scn.num_faces = rinf->num_faces; 1.107 + scn.lights = lights; 1.108 + scn.num_lights = rinf->num_lights; 1.109 + scn.matlib = matlib; 1.110 + scn.cast_shadows = rinf->cast_shadows; 1.111 + 1.112 + struct Ray ray = primrays[idx]; 1.113 + transform_ray(&ray, xform, invtrans); 1.114 + 1.115 + float4 pixel = (float4)(0, 0, 0, 0); 1.116 + float4 energy = (float4)(1.0, 1.0, 1.0, 0.0); 1.117 + int iter = 0; 1.118 + 1.119 + while(iter++ <= rinf->max_iter && mean(energy) > MIN_ENERGY) { 1.120 + struct SurfPoint sp; 1.121 + if(find_intersection(ray, &scn, &sp, kdtree_img)) { 1.122 + pixel += shade(ray, &scn, &sp, kdtree_img) * energy; 1.123 + 1.124 + float4 refl_col = sp.mat.ks * sp.mat.kr; 1.125 + 1.126 + ray.origin = sp.pos; 1.127 + ray.dir = reflect(-ray.dir, sp.norm); 1.128 + 1.129 + energy *= refl_col; 1.130 + } else { 1.131 + energy = (float4)(0.0, 0.0, 0.0, 0.0); 1.132 + } 1.133 + } 1.134 + 1.135 + int2 coord; 1.136 + coord.x = idx % rinf->xsz; 1.137 + coord.y = idx / rinf->xsz; 1.138 + 1.139 + write_imagef(fb, coord, pixel); 1.140 +} 1.141 + 1.142 +float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg) 1.143 +{ 1.144 + float4 norm = sp->norm; 1.145 + //bool entering = true; 1.146 + 1.147 + if(dot(ray.dir, norm) >= 0.0) { 1.148 + norm = -norm; 1.149 + //entering = false; 1.150 + } 1.151 + 1.152 + float4 dcol = scn->ambient * sp->mat.kd; 1.153 + float4 scol = (float4)(0, 0, 0, 0); 1.154 + 1.155 + for(int i=0; i<scn->num_lights; i++) { 1.156 + float4 ldir = scn->lights[i].pos - sp->pos; 1.157 + 1.158 + struct Ray shadowray; 1.159 + shadowray.origin = sp->pos; 1.160 + shadowray.dir = ldir; 1.161 + 1.162 + if(!scn->cast_shadows || !find_intersection(shadowray, scn, 0, kdimg)) { 1.163 + ldir = normalize(ldir); 1.164 + float4 vdir = -ray.dir; 1.165 + vdir.x = native_divide(vdir.x, RAY_MAG); 1.166 + vdir.y = native_divide(vdir.y, RAY_MAG); 1.167 + vdir.z = native_divide(vdir.z, RAY_MAG); 1.168 + float4 vref = reflect(vdir, norm); 1.169 + 1.170 + float diff = fmax(dot(ldir, norm), 0.0f); 1.171 + dcol += sp->mat.kd /* scn->lights[i].color*/ * diff; 1.172 + 1.173 + float spec = native_powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow); 1.174 + scol += sp->mat.ks /* scn->lights[i].color*/ * spec; 1.175 + } 1.176 + } 1.177 + 1.178 + return dcol + scol; 1.179 +} 1.180 + 1.181 +#define STACK_SIZE MAX_TREE_DEPTH 1.182 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg) 1.183 +{ 1.184 + struct SurfPoint sp0; 1.185 + sp0.t = 1.0; 1.186 + sp0.obj = 0; 1.187 + 1.188 + int idxstack[STACK_SIZE]; 1.189 + int top = 0; // points after the topmost element of the stack 1.190 + idxstack[top++] = 0; // root at tree[0] 1.191 + 1.192 + while(top > 0) { 1.193 + int idx = idxstack[--top]; // remove this index from the stack and process it 1.194 + 1.195 + struct KDNode node; 1.196 + read_kdnode(idx, &node, kdimg); 1.197 + 1.198 + if(intersect_aabb(ray, node.aabb)) { 1.199 + if(node.left == -1) { 1.200 + // leaf node... check each face in turn and update the nearest intersection as needed 1.201 + for(int i=0; i<node.num_faces; i++) { 1.202 + struct SurfPoint spt; 1.203 + int fidx = node.face_idx[i]; 1.204 + 1.205 + if(intersect(ray, scn->faces + fidx, &spt) && spt.t < sp0.t) { 1.206 + sp0 = spt; 1.207 + } 1.208 + } 1.209 + } else { 1.210 + // internal node... recurse to the children 1.211 + idxstack[top++] = node.left; 1.212 + idxstack[top++] = node.right; 1.213 + } 1.214 + } 1.215 + } 1.216 + 1.217 + if(!sp0.obj) { 1.218 + return false; 1.219 + } 1.220 + 1.221 + if(spres) { 1.222 + *spres = sp0; 1.223 + spres->mat = scn->matlib[sp0.obj->matid]; 1.224 + } 1.225 + return true; 1.226 +} 1.227 + 1.228 +bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp) 1.229 +{ 1.230 + float4 origin = ray.origin; 1.231 + float4 dir = ray.dir; 1.232 + float4 norm = face->normal; 1.233 + 1.234 + float ndotdir = dot(dir, norm); 1.235 + 1.236 + if(fabs(ndotdir) <= EPSILON) { 1.237 + return false; 1.238 + } 1.239 + 1.240 + float4 pt = face->v[0].pos; 1.241 + float4 vec = pt - origin; 1.242 + 1.243 + float ndotvec = dot(norm, vec); 1.244 + float t = native_divide(ndotvec, ndotdir); 1.245 + 1.246 + if(t < EPSILON || t > 1.0) { 1.247 + return false; 1.248 + } 1.249 + pt = origin + dir * t; 1.250 + 1.251 + 1.252 + float4 bc = calc_bary(pt, face, norm); 1.253 + float bc_sum = bc.x + bc.y + bc.z; 1.254 + 1.255 + if(bc_sum < 1.0 - EPSILON || bc_sum > 1.0 + EPSILON) { 1.256 + return false; 1.257 + } 1.258 + 1.259 + sp->t = t; 1.260 + sp->pos = pt; 1.261 + sp->norm = normalize(face->v[0].normal * bc.x + face->v[1].normal * bc.y + face->v[2].normal * bc.z); 1.262 + sp->obj = face; 1.263 + sp->dbg = bc; 1.264 + return true; 1.265 +} 1.266 + 1.267 +bool intersect_aabb(struct Ray ray, struct AABBox aabb) 1.268 +{ 1.269 + if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z && 1.270 + ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) { 1.271 + return true; 1.272 + } 1.273 + 1.274 + float4 bbox[2] = { 1.275 + aabb.min.x, aabb.min.y, aabb.min.z, 0, 1.276 + aabb.max.x, aabb.max.y, aabb.max.z, 0 1.277 + }; 1.278 + 1.279 + int xsign = (int)(ray.dir.x < 0.0); 1.280 + float invdirx = native_recip(ray.dir.x); 1.281 + float tmin = (bbox[xsign].x - ray.origin.x) * invdirx; 1.282 + float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx; 1.283 + 1.284 + int ysign = (int)(ray.dir.y < 0.0); 1.285 + float invdiry = native_recip(ray.dir.y); 1.286 + float tymin = (bbox[ysign].y - ray.origin.y) * invdiry; 1.287 + float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry; 1.288 + 1.289 + if(tmin > tymax || tymin > tmax) { 1.290 + return false; 1.291 + } 1.292 + 1.293 + if(tymin > tmin) tmin = tymin; 1.294 + if(tymax < tmax) tmax = tymax; 1.295 + 1.296 + int zsign = (int)(ray.dir.z < 0.0); 1.297 + float invdirz = native_recip(ray.dir.z); 1.298 + float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz; 1.299 + float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz; 1.300 + 1.301 + if(tmin > tzmax || tzmin > tmax) { 1.302 + return false; 1.303 + } 1.304 + 1.305 + return tmin < 1.0 && tmax > 0.0; 1.306 +} 1.307 + 1.308 +float4 reflect(float4 v, float4 n) 1.309 +{ 1.310 + return 2.0f * dot(v, n) * n - v; 1.311 +} 1.312 + 1.313 +float4 transform(float4 v, global const float *xform) 1.314 +{ 1.315 + float4 res; 1.316 + res.x = v.x * xform[0] + v.y * xform[4] + v.z * xform[8] + xform[12]; 1.317 + res.y = v.x * xform[1] + v.y * xform[5] + v.z * xform[9] + xform[13]; 1.318 + res.z = v.x * xform[2] + v.y * xform[6] + v.z * xform[10] + xform[14]; 1.319 + res.w = 0.0; 1.320 + return res; 1.321 +} 1.322 + 1.323 +void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans) 1.324 +{ 1.325 + ray->origin = transform(ray->origin, xform); 1.326 + ray->dir = transform(ray->dir, invtrans); 1.327 +} 1.328 + 1.329 +float4 calc_bary(float4 pt, global const struct Face *face, float4 norm) 1.330 +{ 1.331 + float4 bc = (float4)(0, 0, 0, 0); 1.332 + 1.333 + // calculate area of the whole triangle 1.334 + float4 v1 = face->v[1].pos - face->v[0].pos; 1.335 + float4 v2 = face->v[2].pos - face->v[0].pos; 1.336 + float4 xv1v2 = cross(v1, v2); 1.337 + 1.338 + float area = fabs(dot(xv1v2, norm)) * 0.5; 1.339 + if(area < EPSILON) { 1.340 + return bc; 1.341 + } 1.342 + 1.343 + float4 pv0 = face->v[0].pos - pt; 1.344 + float4 pv1 = face->v[1].pos - pt; 1.345 + float4 pv2 = face->v[2].pos - pt; 1.346 + 1.347 + // calculate the area of each sub-triangle 1.348 + float4 x12 = cross(pv1, pv2); 1.349 + float4 x20 = cross(pv2, pv0); 1.350 + float4 x01 = cross(pv0, pv1); 1.351 + 1.352 + float a0 = fabs(dot(x12, norm)) * 0.5; 1.353 + float a1 = fabs(dot(x20, norm)) * 0.5; 1.354 + float a2 = fabs(dot(x01, norm)) * 0.5; 1.355 + 1.356 + bc.x = native_divide(a0, area); 1.357 + bc.y = native_divide(a1, area); 1.358 + bc.z = native_divide(a2, area); 1.359 + return bc; 1.360 +} 1.361 + 1.362 +float mean(float4 v) 1.363 +{ 1.364 + return native_divide(v.x + v.y + v.z, 3.0); 1.365 +} 1.366 + 1.367 + 1.368 +const sampler_t kdsampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; 1.369 + 1.370 +// read a KD-tree node from a texture scanline 1.371 +void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg) 1.372 +{ 1.373 + int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT); 1.374 + 1.375 + int2 tc; 1.376 + tc.x = startx; 1.377 + tc.y = idx % KDIMG_MAX_HEIGHT; 1.378 + 1.379 + node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++; 1.380 + node->aabb.max = read_imagef(kdimg, kdsampler, tc); 1.381 + 1.382 + tc.x = startx + 2 + MAX_NODE_FACES / 4; 1.383 + float4 pix = read_imagef(kdimg, kdsampler, tc); 1.384 + node->num_faces = (int)pix.x; 1.385 + node->left = (int)pix.y; 1.386 + node->right = (int)pix.z; 1.387 + 1.388 + tc.x = startx + 2; 1.389 + for(int i=0; i<node->num_faces; i+=4) { 1.390 + float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++; 1.391 + node->face_idx[i] = (int)pix.x; 1.392 + node->face_idx[i + 1] = (int)pix.y; 1.393 + node->face_idx[i + 2] = (int)pix.z; 1.394 + node->face_idx[i + 3] = (int)pix.w; 1.395 + } 1.396 +}