clray

annotate rt.cl @ 45:8047637961a2

fixed the issue of hitting maximum vertical image sizes for large kdtrees
author John Tsiombikas <nuclear@member.fsf.org>
date Sun, 29 Aug 2010 04:20:42 +0100
parents f9eec11e5acc
children 30bf84881553
rev   line source
nuclear@12 1 /* vim: set ft=opencl:ts=4:sw=4 */
nuclear@45 2 #include "common.h"
nuclear@12 3
nuclear@2 4 struct RendInfo {
nuclear@22 5 float4 ambient;
nuclear@2 6 int xsz, ysz;
nuclear@9 7 int num_faces, num_lights;
nuclear@2 8 int max_iter;
nuclear@28 9 int kd_depth;
nuclear@2 10 };
nuclear@2 11
nuclear@9 12 struct Vertex {
nuclear@2 13 float4 pos;
nuclear@9 14 float4 normal;
nuclear@12 15 float4 tex;
nuclear@12 16 float4 padding;
nuclear@9 17 };
nuclear@9 18
nuclear@9 19 struct Face {
nuclear@9 20 struct Vertex v[3];
nuclear@9 21 float4 normal;
nuclear@9 22 int matid;
nuclear@12 23 int padding[3];
nuclear@9 24 };
nuclear@9 25
nuclear@9 26 struct Material {
nuclear@5 27 float4 kd, ks;
nuclear@9 28 float kr, kt;
nuclear@9 29 float spow;
nuclear@12 30 float padding;
nuclear@2 31 };
nuclear@2 32
nuclear@3 33 struct Light {
nuclear@3 34 float4 pos, color;
nuclear@3 35 };
nuclear@3 36
nuclear@2 37 struct Ray {
nuclear@2 38 float4 origin, dir;
nuclear@2 39 };
nuclear@2 40
nuclear@2 41 struct SurfPoint {
nuclear@2 42 float t;
nuclear@12 43 float4 pos, norm, dbg;
nuclear@9 44 global const struct Face *obj;
nuclear@19 45 struct Material mat;
nuclear@2 46 };
nuclear@2 47
nuclear@16 48 struct Scene {
nuclear@16 49 float4 ambient;
nuclear@16 50 global const struct Face *faces;
nuclear@16 51 int num_faces;
nuclear@16 52 global const struct Light *lights;
nuclear@16 53 int num_lights;
nuclear@16 54 global const struct Material *matlib;
nuclear@43 55 //global const struct KDNode *kdtree;
nuclear@28 56 };
nuclear@28 57
nuclear@28 58 struct AABBox {
nuclear@28 59 float4 min, max;
nuclear@28 60 };
nuclear@28 61
nuclear@28 62 struct KDNode {
nuclear@29 63 struct AABBox aabb;
nuclear@43 64 int face_idx[MAX_NODE_FACES];
nuclear@28 65 int num_faces;
nuclear@35 66 int left, right;
nuclear@35 67 int padding;
nuclear@16 68 };
nuclear@2 69
nuclear@16 70 #define MIN_ENERGY 0.001
nuclear@21 71 #define EPSILON 1e-5
nuclear@16 72
nuclear@43 73 float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg);
nuclear@43 74 bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp, read_only image2d_t kdimg);
nuclear@9 75 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp);
nuclear@28 76 bool intersect_aabb(struct Ray ray, struct AABBox aabb);
nuclear@16 77
nuclear@8 78 float4 reflect(float4 v, float4 n);
nuclear@8 79 float4 transform(float4 v, global const float *xform);
nuclear@16 80 void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans);
nuclear@12 81 float4 calc_bary(float4 pt, global const struct Face *face, float4 norm);
nuclear@19 82 float mean(float4 v);
nuclear@4 83
nuclear@43 84 void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg);
nuclear@43 85
nuclear@39 86
nuclear@39 87 kernel void render(write_only image2d_t fb,
nuclear@4 88 global const struct RendInfo *rinf,
nuclear@9 89 global const struct Face *faces,
nuclear@9 90 global const struct Material *matlib,
nuclear@4 91 global const struct Light *lights,
nuclear@7 92 global const struct Ray *primrays,
nuclear@12 93 global const float *xform,
nuclear@28 94 global const float *invtrans,
nuclear@43 95 //global const struct KDNode *kdtree
nuclear@43 96 read_only image2d_t kdtree_img)
nuclear@2 97 {
nuclear@2 98 int idx = get_global_id(0);
nuclear@2 99
nuclear@16 100 struct Scene scn;
nuclear@16 101 scn.ambient = rinf->ambient;
nuclear@16 102 scn.faces = faces;
nuclear@16 103 scn.num_faces = rinf->num_faces;
nuclear@16 104 scn.lights = lights;
nuclear@16 105 scn.num_lights = rinf->num_lights;
nuclear@16 106 scn.matlib = matlib;
nuclear@43 107 //scn.kdtree_img = kdtree_img;
nuclear@8 108
nuclear@16 109 struct Ray ray = primrays[idx];
nuclear@16 110 transform_ray(&ray, xform, invtrans);
nuclear@4 111
nuclear@19 112 float4 pixel = (float4)(0, 0, 0, 0);
nuclear@22 113 float4 energy = (float4)(1.0, 1.0, 1.0, 0.0);
nuclear@19 114 int iter = 0;
nuclear@19 115
nuclear@19 116 while(iter++ < rinf->max_iter && mean(energy) > MIN_ENERGY) {
nuclear@19 117 struct SurfPoint sp;
nuclear@43 118 if(find_intersection(ray, &scn, &sp, kdtree_img)) {
nuclear@43 119 pixel += shade(ray, &scn, &sp, kdtree_img) * energy;
nuclear@19 120
nuclear@19 121 float4 refl_col = sp.mat.ks * sp.mat.kr;
nuclear@19 122
nuclear@19 123 ray.origin = sp.pos;
nuclear@19 124 ray.dir = reflect(-ray.dir, sp.norm);
nuclear@19 125
nuclear@35 126 energy *= refl_col;
nuclear@19 127 } else {
nuclear@43 128 energy = (float4)(0.0, 0.0, 0.0, 0.0);
nuclear@19 129 }
nuclear@17 130 }
nuclear@19 131
nuclear@39 132 int2 coord;
nuclear@43 133 coord.x = idx % rinf->xsz;
nuclear@43 134 coord.y = idx / rinf->xsz;
nuclear@39 135
nuclear@39 136 write_imagef(fb, coord, pixel);
nuclear@4 137 }
nuclear@4 138
nuclear@43 139 float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg)
nuclear@16 140 {
nuclear@16 141 float4 norm = sp->norm;
nuclear@43 142 //bool entering = true;
nuclear@12 143
nuclear@12 144 if(dot(ray.dir, norm) >= 0.0) {
nuclear@12 145 norm = -norm;
nuclear@43 146 //entering = false;
nuclear@12 147 }
nuclear@12 148
nuclear@19 149 float4 dcol = scn->ambient * sp->mat.kd;
nuclear@8 150 float4 scol = (float4)(0, 0, 0, 0);
nuclear@5 151
nuclear@16 152 for(int i=0; i<scn->num_lights; i++) {
nuclear@16 153 float4 ldir = scn->lights[i].pos - sp->pos;
nuclear@5 154
nuclear@16 155 struct Ray shadowray;
nuclear@16 156 shadowray.origin = sp->pos;
nuclear@16 157 shadowray.dir = ldir;
nuclear@5 158
nuclear@43 159 if(!find_intersection(shadowray, scn, 0, kdimg)) {
nuclear@16 160 ldir = normalize(ldir);
nuclear@43 161 float4 vdir = -ray.dir;
nuclear@43 162 vdir.x = native_divide(vdir.x, RAY_MAG);
nuclear@43 163 vdir.y = native_divide(vdir.y, RAY_MAG);
nuclear@43 164 vdir.z = native_divide(vdir.z, RAY_MAG);
nuclear@16 165 float4 vref = reflect(vdir, norm);
nuclear@16 166
nuclear@16 167 float diff = fmax(dot(ldir, norm), 0.0f);
nuclear@43 168 dcol += sp->mat.kd /* scn->lights[i].color*/ * diff;
nuclear@16 169
nuclear@43 170 float spec = native_powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow);
nuclear@43 171 scol += sp->mat.ks /* scn->lights[i].color*/ * spec;
nuclear@16 172 }
nuclear@16 173 }
nuclear@16 174
nuclear@8 175 return dcol + scol;
nuclear@2 176 }
nuclear@2 177
nuclear@45 178 #define STACK_SIZE MAX_TREE_DEPTH
nuclear@43 179 bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg)
nuclear@28 180 {
nuclear@29 181 struct SurfPoint sp0;
nuclear@29 182 sp0.t = 1.0;
nuclear@29 183 sp0.obj = 0;
nuclear@29 184
nuclear@29 185 int idxstack[STACK_SIZE];
nuclear@31 186 int top = 0; // points after the topmost element of the stack
nuclear@35 187 idxstack[top++] = 0; // root at tree[0]
nuclear@29 188
nuclear@31 189 while(top > 0) {
nuclear@31 190 int idx = idxstack[--top]; // remove this index from the stack and process it
nuclear@30 191
nuclear@43 192 struct KDNode node;
nuclear@43 193 read_kdnode(idx, &node, kdimg);
nuclear@29 194
nuclear@43 195 if(intersect_aabb(ray, node.aabb)) {
nuclear@43 196 if(node.left == -1) {
nuclear@31 197 // leaf node... check each face in turn and update the nearest intersection as needed
nuclear@43 198 for(int i=0; i<node.num_faces; i++) {
nuclear@31 199 struct SurfPoint spt;
nuclear@43 200 int fidx = node.face_idx[i];
nuclear@29 201
nuclear@31 202 if(intersect(ray, scn->faces + fidx, &spt) && spt.t < sp0.t) {
nuclear@31 203 sp0 = spt;
nuclear@29 204 }
nuclear@29 205 }
nuclear@31 206 } else {
nuclear@31 207 // internal node... recurse to the children
nuclear@43 208 idxstack[top++] = node.left;
nuclear@43 209 idxstack[top++] = node.right;
nuclear@29 210 }
nuclear@29 211 }
nuclear@29 212 }
nuclear@29 213
nuclear@29 214 if(!sp0.obj) {
nuclear@29 215 return false;
nuclear@29 216 }
nuclear@29 217
nuclear@29 218 if(spres) {
nuclear@29 219 *spres = sp0;
nuclear@29 220 spres->mat = scn->matlib[sp0.obj->matid];
nuclear@29 221 }
nuclear@29 222 return true;
nuclear@28 223 }
nuclear@16 224
nuclear@16 225 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp)
nuclear@2 226 {
nuclear@12 227 float4 origin = ray.origin;
nuclear@12 228 float4 dir = ray.dir;
nuclear@12 229 float4 norm = face->normal;
nuclear@12 230
nuclear@16 231 float ndotdir = dot(dir, norm);
nuclear@12 232
nuclear@9 233 if(fabs(ndotdir) <= EPSILON) {
nuclear@9 234 return false;
nuclear@9 235 }
nuclear@2 236
nuclear@9 237 float4 pt = face->v[0].pos;
nuclear@12 238 float4 vec = pt - origin;
nuclear@2 239
nuclear@16 240 float ndotvec = dot(norm, vec);
nuclear@43 241 float t = native_divide(ndotvec, ndotdir);
nuclear@2 242
nuclear@2 243 if(t < EPSILON || t > 1.0) {
nuclear@2 244 return false;
nuclear@2 245 }
nuclear@12 246 pt = origin + dir * t;
nuclear@9 247
nuclear@12 248
nuclear@12 249 float4 bc = calc_bary(pt, face, norm);
nuclear@9 250 float bc_sum = bc.x + bc.y + bc.z;
nuclear@9 251
nuclear@20 252 if(bc_sum < 1.0 - EPSILON || bc_sum > 1.0 + EPSILON) {
nuclear@9 253 return false;
nuclear@12 254 bc *= 1.2;
nuclear@9 255 }
nuclear@2 256
nuclear@2 257 sp->t = t;
nuclear@9 258 sp->pos = pt;
nuclear@21 259 sp->norm = normalize(face->v[0].normal * bc.x + face->v[1].normal * bc.y + face->v[2].normal * bc.z);
nuclear@9 260 sp->obj = face;
nuclear@12 261 sp->dbg = bc;
nuclear@2 262 return true;
nuclear@2 263 }
nuclear@5 264
nuclear@28 265 bool intersect_aabb(struct Ray ray, struct AABBox aabb)
nuclear@28 266 {
nuclear@28 267 if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z &&
nuclear@28 268 ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) {
nuclear@28 269 return true;
nuclear@28 270 }
nuclear@28 271
nuclear@29 272 float4 bbox[2] = {
nuclear@29 273 aabb.min.x, aabb.min.y, aabb.min.z, 0,
nuclear@29 274 aabb.max.x, aabb.max.y, aabb.max.z, 0
nuclear@29 275 };
nuclear@28 276
nuclear@28 277 int xsign = (int)(ray.dir.x < 0.0);
nuclear@43 278 float invdirx = native_recip(ray.dir.x);
nuclear@28 279 float tmin = (bbox[xsign].x - ray.origin.x) * invdirx;
nuclear@28 280 float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx;
nuclear@28 281
nuclear@28 282 int ysign = (int)(ray.dir.y < 0.0);
nuclear@43 283 float invdiry = native_recip(ray.dir.y);
nuclear@28 284 float tymin = (bbox[ysign].y - ray.origin.y) * invdiry;
nuclear@28 285 float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry;
nuclear@28 286
nuclear@28 287 if(tmin > tymax || tymin > tmax) {
nuclear@28 288 return false;
nuclear@28 289 }
nuclear@28 290
nuclear@28 291 if(tymin > tmin) tmin = tymin;
nuclear@28 292 if(tymax < tmax) tmax = tymax;
nuclear@28 293
nuclear@28 294 int zsign = (int)(ray.dir.z < 0.0);
nuclear@43 295 float invdirz = native_recip(ray.dir.z);
nuclear@28 296 float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz;
nuclear@28 297 float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz;
nuclear@28 298
nuclear@28 299 if(tmin > tzmax || tzmin > tmax) {
nuclear@28 300 return false;
nuclear@28 301 }
nuclear@28 302
nuclear@29 303 return tmin < 1.0 && tmax > 0.0;
nuclear@28 304 }
nuclear@28 305
nuclear@8 306 float4 reflect(float4 v, float4 n)
nuclear@5 307 {
nuclear@23 308 return 2.0f * dot(v, n) * n - v;
nuclear@5 309 }
nuclear@8 310
nuclear@8 311 float4 transform(float4 v, global const float *xform)
nuclear@8 312 {
nuclear@8 313 float4 res;
nuclear@8 314 res.x = v.x * xform[0] + v.y * xform[4] + v.z * xform[8] + xform[12];
nuclear@8 315 res.y = v.x * xform[1] + v.y * xform[5] + v.z * xform[9] + xform[13];
nuclear@8 316 res.z = v.x * xform[2] + v.y * xform[6] + v.z * xform[10] + xform[14];
nuclear@12 317 res.w = 0.0;
nuclear@8 318 return res;
nuclear@8 319 }
nuclear@8 320
nuclear@16 321 void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans)
nuclear@8 322 {
nuclear@16 323 ray->origin = transform(ray->origin, xform);
nuclear@16 324 ray->dir = transform(ray->dir, invtrans);
nuclear@8 325 }
nuclear@9 326
nuclear@12 327 float4 calc_bary(float4 pt, global const struct Face *face, float4 norm)
nuclear@9 328 {
nuclear@12 329 float4 bc = (float4)(0, 0, 0, 0);
nuclear@9 330
nuclear@12 331 // calculate area of the whole triangle
nuclear@12 332 float4 v1 = face->v[1].pos - face->v[0].pos;
nuclear@12 333 float4 v2 = face->v[2].pos - face->v[0].pos;
nuclear@12 334 float4 xv1v2 = cross(v1, v2);
nuclear@12 335
nuclear@16 336 float area = fabs(dot(xv1v2, norm)) * 0.5;
nuclear@9 337 if(area < EPSILON) {
nuclear@9 338 return bc;
nuclear@9 339 }
nuclear@9 340
nuclear@9 341 float4 pv0 = face->v[0].pos - pt;
nuclear@9 342 float4 pv1 = face->v[1].pos - pt;
nuclear@9 343 float4 pv2 = face->v[2].pos - pt;
nuclear@9 344
nuclear@12 345 // calculate the area of each sub-triangle
nuclear@12 346 float4 x12 = cross(pv1, pv2);
nuclear@12 347 float4 x20 = cross(pv2, pv0);
nuclear@12 348 float4 x01 = cross(pv0, pv1);
nuclear@12 349
nuclear@16 350 float a0 = fabs(dot(x12, norm)) * 0.5;
nuclear@16 351 float a1 = fabs(dot(x20, norm)) * 0.5;
nuclear@16 352 float a2 = fabs(dot(x01, norm)) * 0.5;
nuclear@9 353
nuclear@43 354 bc.x = native_divide(a0, area);
nuclear@43 355 bc.y = native_divide(a1, area);
nuclear@43 356 bc.z = native_divide(a2, area);
nuclear@9 357 return bc;
nuclear@9 358 }
nuclear@19 359
nuclear@19 360 float mean(float4 v)
nuclear@19 361 {
nuclear@19 362 return native_divide(v.x + v.y + v.z, 3.0);
nuclear@19 363 }
nuclear@43 364
nuclear@43 365
nuclear@43 366 const sampler_t kdsampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
nuclear@43 367
nuclear@43 368 // read a KD-tree node from a texture scanline
nuclear@43 369 void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg)
nuclear@43 370 {
nuclear@45 371 int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT);
nuclear@45 372
nuclear@43 373 int2 tc;
nuclear@45 374 tc.x = startx;
nuclear@45 375 tc.y = idx % KDIMG_MAX_HEIGHT;
nuclear@43 376
nuclear@43 377 node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++;
nuclear@43 378 node->aabb.max = read_imagef(kdimg, kdsampler, tc);
nuclear@43 379
nuclear@45 380 tc.x = startx + 2 + MAX_NODE_FACES / 4;
nuclear@43 381 float4 pix = read_imagef(kdimg, kdsampler, tc);
nuclear@43 382 node->num_faces = (int)pix.x;
nuclear@43 383 node->left = (int)pix.y;
nuclear@43 384 node->right = (int)pix.z;
nuclear@43 385
nuclear@45 386 tc.x = startx + 2;
nuclear@43 387 for(int i=0; i<node->num_faces; i+=4) {
nuclear@43 388 float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++;
nuclear@43 389 node->face_idx[i] = (int)pix.x;
nuclear@43 390 node->face_idx[i + 1] = (int)pix.y;
nuclear@43 391 node->face_idx[i + 2] = (int)pix.z;
nuclear@43 392 node->face_idx[i + 3] = (int)pix.w;
nuclear@43 393 }
nuclear@43 394 }