clray
view rt.cl @ 45:8047637961a2
fixed the issue of hitting maximum vertical image sizes for large kdtrees
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sun, 29 Aug 2010 04:20:42 +0100 |
parents | f9eec11e5acc |
children | 30bf84881553 |
line source
1 /* vim: set ft=opencl:ts=4:sw=4 */
2 #include "common.h"
4 struct RendInfo {
5 float4 ambient;
6 int xsz, ysz;
7 int num_faces, num_lights;
8 int max_iter;
9 int kd_depth;
10 };
12 struct Vertex {
13 float4 pos;
14 float4 normal;
15 float4 tex;
16 float4 padding;
17 };
19 struct Face {
20 struct Vertex v[3];
21 float4 normal;
22 int matid;
23 int padding[3];
24 };
26 struct Material {
27 float4 kd, ks;
28 float kr, kt;
29 float spow;
30 float padding;
31 };
33 struct Light {
34 float4 pos, color;
35 };
37 struct Ray {
38 float4 origin, dir;
39 };
41 struct SurfPoint {
42 float t;
43 float4 pos, norm, dbg;
44 global const struct Face *obj;
45 struct Material mat;
46 };
48 struct Scene {
49 float4 ambient;
50 global const struct Face *faces;
51 int num_faces;
52 global const struct Light *lights;
53 int num_lights;
54 global const struct Material *matlib;
55 //global const struct KDNode *kdtree;
56 };
58 struct AABBox {
59 float4 min, max;
60 };
62 struct KDNode {
63 struct AABBox aabb;
64 int face_idx[MAX_NODE_FACES];
65 int num_faces;
66 int left, right;
67 int padding;
68 };
70 #define MIN_ENERGY 0.001
71 #define EPSILON 1e-5
73 float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg);
74 bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp, read_only image2d_t kdimg);
75 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp);
76 bool intersect_aabb(struct Ray ray, struct AABBox aabb);
78 float4 reflect(float4 v, float4 n);
79 float4 transform(float4 v, global const float *xform);
80 void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans);
81 float4 calc_bary(float4 pt, global const struct Face *face, float4 norm);
82 float mean(float4 v);
84 void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg);
87 kernel void render(write_only image2d_t fb,
88 global const struct RendInfo *rinf,
89 global const struct Face *faces,
90 global const struct Material *matlib,
91 global const struct Light *lights,
92 global const struct Ray *primrays,
93 global const float *xform,
94 global const float *invtrans,
95 //global const struct KDNode *kdtree
96 read_only image2d_t kdtree_img)
97 {
98 int idx = get_global_id(0);
100 struct Scene scn;
101 scn.ambient = rinf->ambient;
102 scn.faces = faces;
103 scn.num_faces = rinf->num_faces;
104 scn.lights = lights;
105 scn.num_lights = rinf->num_lights;
106 scn.matlib = matlib;
107 //scn.kdtree_img = kdtree_img;
109 struct Ray ray = primrays[idx];
110 transform_ray(&ray, xform, invtrans);
112 float4 pixel = (float4)(0, 0, 0, 0);
113 float4 energy = (float4)(1.0, 1.0, 1.0, 0.0);
114 int iter = 0;
116 while(iter++ < rinf->max_iter && mean(energy) > MIN_ENERGY) {
117 struct SurfPoint sp;
118 if(find_intersection(ray, &scn, &sp, kdtree_img)) {
119 pixel += shade(ray, &scn, &sp, kdtree_img) * energy;
121 float4 refl_col = sp.mat.ks * sp.mat.kr;
123 ray.origin = sp.pos;
124 ray.dir = reflect(-ray.dir, sp.norm);
126 energy *= refl_col;
127 } else {
128 energy = (float4)(0.0, 0.0, 0.0, 0.0);
129 }
130 }
132 int2 coord;
133 coord.x = idx % rinf->xsz;
134 coord.y = idx / rinf->xsz;
136 write_imagef(fb, coord, pixel);
137 }
139 float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg)
140 {
141 float4 norm = sp->norm;
142 //bool entering = true;
144 if(dot(ray.dir, norm) >= 0.0) {
145 norm = -norm;
146 //entering = false;
147 }
149 float4 dcol = scn->ambient * sp->mat.kd;
150 float4 scol = (float4)(0, 0, 0, 0);
152 for(int i=0; i<scn->num_lights; i++) {
153 float4 ldir = scn->lights[i].pos - sp->pos;
155 struct Ray shadowray;
156 shadowray.origin = sp->pos;
157 shadowray.dir = ldir;
159 if(!find_intersection(shadowray, scn, 0, kdimg)) {
160 ldir = normalize(ldir);
161 float4 vdir = -ray.dir;
162 vdir.x = native_divide(vdir.x, RAY_MAG);
163 vdir.y = native_divide(vdir.y, RAY_MAG);
164 vdir.z = native_divide(vdir.z, RAY_MAG);
165 float4 vref = reflect(vdir, norm);
167 float diff = fmax(dot(ldir, norm), 0.0f);
168 dcol += sp->mat.kd /* scn->lights[i].color*/ * diff;
170 float spec = native_powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow);
171 scol += sp->mat.ks /* scn->lights[i].color*/ * spec;
172 }
173 }
175 return dcol + scol;
176 }
178 #define STACK_SIZE MAX_TREE_DEPTH
179 bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg)
180 {
181 struct SurfPoint sp0;
182 sp0.t = 1.0;
183 sp0.obj = 0;
185 int idxstack[STACK_SIZE];
186 int top = 0; // points after the topmost element of the stack
187 idxstack[top++] = 0; // root at tree[0]
189 while(top > 0) {
190 int idx = idxstack[--top]; // remove this index from the stack and process it
192 struct KDNode node;
193 read_kdnode(idx, &node, kdimg);
195 if(intersect_aabb(ray, node.aabb)) {
196 if(node.left == -1) {
197 // leaf node... check each face in turn and update the nearest intersection as needed
198 for(int i=0; i<node.num_faces; i++) {
199 struct SurfPoint spt;
200 int fidx = node.face_idx[i];
202 if(intersect(ray, scn->faces + fidx, &spt) && spt.t < sp0.t) {
203 sp0 = spt;
204 }
205 }
206 } else {
207 // internal node... recurse to the children
208 idxstack[top++] = node.left;
209 idxstack[top++] = node.right;
210 }
211 }
212 }
214 if(!sp0.obj) {
215 return false;
216 }
218 if(spres) {
219 *spres = sp0;
220 spres->mat = scn->matlib[sp0.obj->matid];
221 }
222 return true;
223 }
225 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp)
226 {
227 float4 origin = ray.origin;
228 float4 dir = ray.dir;
229 float4 norm = face->normal;
231 float ndotdir = dot(dir, norm);
233 if(fabs(ndotdir) <= EPSILON) {
234 return false;
235 }
237 float4 pt = face->v[0].pos;
238 float4 vec = pt - origin;
240 float ndotvec = dot(norm, vec);
241 float t = native_divide(ndotvec, ndotdir);
243 if(t < EPSILON || t > 1.0) {
244 return false;
245 }
246 pt = origin + dir * t;
249 float4 bc = calc_bary(pt, face, norm);
250 float bc_sum = bc.x + bc.y + bc.z;
252 if(bc_sum < 1.0 - EPSILON || bc_sum > 1.0 + EPSILON) {
253 return false;
254 bc *= 1.2;
255 }
257 sp->t = t;
258 sp->pos = pt;
259 sp->norm = normalize(face->v[0].normal * bc.x + face->v[1].normal * bc.y + face->v[2].normal * bc.z);
260 sp->obj = face;
261 sp->dbg = bc;
262 return true;
263 }
265 bool intersect_aabb(struct Ray ray, struct AABBox aabb)
266 {
267 if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z &&
268 ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) {
269 return true;
270 }
272 float4 bbox[2] = {
273 aabb.min.x, aabb.min.y, aabb.min.z, 0,
274 aabb.max.x, aabb.max.y, aabb.max.z, 0
275 };
277 int xsign = (int)(ray.dir.x < 0.0);
278 float invdirx = native_recip(ray.dir.x);
279 float tmin = (bbox[xsign].x - ray.origin.x) * invdirx;
280 float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx;
282 int ysign = (int)(ray.dir.y < 0.0);
283 float invdiry = native_recip(ray.dir.y);
284 float tymin = (bbox[ysign].y - ray.origin.y) * invdiry;
285 float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry;
287 if(tmin > tymax || tymin > tmax) {
288 return false;
289 }
291 if(tymin > tmin) tmin = tymin;
292 if(tymax < tmax) tmax = tymax;
294 int zsign = (int)(ray.dir.z < 0.0);
295 float invdirz = native_recip(ray.dir.z);
296 float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz;
297 float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz;
299 if(tmin > tzmax || tzmin > tmax) {
300 return false;
301 }
303 return tmin < 1.0 && tmax > 0.0;
304 }
306 float4 reflect(float4 v, float4 n)
307 {
308 return 2.0f * dot(v, n) * n - v;
309 }
311 float4 transform(float4 v, global const float *xform)
312 {
313 float4 res;
314 res.x = v.x * xform[0] + v.y * xform[4] + v.z * xform[8] + xform[12];
315 res.y = v.x * xform[1] + v.y * xform[5] + v.z * xform[9] + xform[13];
316 res.z = v.x * xform[2] + v.y * xform[6] + v.z * xform[10] + xform[14];
317 res.w = 0.0;
318 return res;
319 }
321 void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans)
322 {
323 ray->origin = transform(ray->origin, xform);
324 ray->dir = transform(ray->dir, invtrans);
325 }
327 float4 calc_bary(float4 pt, global const struct Face *face, float4 norm)
328 {
329 float4 bc = (float4)(0, 0, 0, 0);
331 // calculate area of the whole triangle
332 float4 v1 = face->v[1].pos - face->v[0].pos;
333 float4 v2 = face->v[2].pos - face->v[0].pos;
334 float4 xv1v2 = cross(v1, v2);
336 float area = fabs(dot(xv1v2, norm)) * 0.5;
337 if(area < EPSILON) {
338 return bc;
339 }
341 float4 pv0 = face->v[0].pos - pt;
342 float4 pv1 = face->v[1].pos - pt;
343 float4 pv2 = face->v[2].pos - pt;
345 // calculate the area of each sub-triangle
346 float4 x12 = cross(pv1, pv2);
347 float4 x20 = cross(pv2, pv0);
348 float4 x01 = cross(pv0, pv1);
350 float a0 = fabs(dot(x12, norm)) * 0.5;
351 float a1 = fabs(dot(x20, norm)) * 0.5;
352 float a2 = fabs(dot(x01, norm)) * 0.5;
354 bc.x = native_divide(a0, area);
355 bc.y = native_divide(a1, area);
356 bc.z = native_divide(a2, area);
357 return bc;
358 }
360 float mean(float4 v)
361 {
362 return native_divide(v.x + v.y + v.z, 3.0);
363 }
366 const sampler_t kdsampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
368 // read a KD-tree node from a texture scanline
369 void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg)
370 {
371 int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT);
373 int2 tc;
374 tc.x = startx;
375 tc.y = idx % KDIMG_MAX_HEIGHT;
377 node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++;
378 node->aabb.max = read_imagef(kdimg, kdsampler, tc);
380 tc.x = startx + 2 + MAX_NODE_FACES / 4;
381 float4 pix = read_imagef(kdimg, kdsampler, tc);
382 node->num_faces = (int)pix.x;
383 node->left = (int)pix.y;
384 node->right = (int)pix.z;
386 tc.x = startx + 2;
387 for(int i=0; i<node->num_faces; i+=4) {
388 float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++;
389 node->face_idx[i] = (int)pix.x;
390 node->face_idx[i + 1] = (int)pix.y;
391 node->face_idx[i + 2] = (int)pix.z;
392 node->face_idx[i + 3] = (int)pix.w;
393 }
394 }