rev |
line source |
nuclear@12
|
1 /* vim: set ft=opencl:ts=4:sw=4 */
|
nuclear@45
|
2 #include "common.h"
|
nuclear@12
|
3
|
nuclear@2
|
4 struct RendInfo {
|
nuclear@22
|
5 float4 ambient;
|
nuclear@2
|
6 int xsz, ysz;
|
nuclear@9
|
7 int num_faces, num_lights;
|
nuclear@2
|
8 int max_iter;
|
nuclear@47
|
9 int cast_shadows;
|
nuclear@2
|
10 };
|
nuclear@2
|
11
|
nuclear@9
|
12 struct Vertex {
|
nuclear@2
|
13 float4 pos;
|
nuclear@9
|
14 float4 normal;
|
nuclear@12
|
15 float4 tex;
|
nuclear@12
|
16 float4 padding;
|
nuclear@9
|
17 };
|
nuclear@9
|
18
|
nuclear@9
|
19 struct Face {
|
nuclear@9
|
20 struct Vertex v[3];
|
nuclear@9
|
21 float4 normal;
|
nuclear@9
|
22 int matid;
|
nuclear@12
|
23 int padding[3];
|
nuclear@9
|
24 };
|
nuclear@9
|
25
|
nuclear@9
|
26 struct Material {
|
nuclear@5
|
27 float4 kd, ks;
|
nuclear@9
|
28 float kr, kt;
|
nuclear@9
|
29 float spow;
|
nuclear@12
|
30 float padding;
|
nuclear@2
|
31 };
|
nuclear@2
|
32
|
nuclear@3
|
33 struct Light {
|
nuclear@3
|
34 float4 pos, color;
|
nuclear@3
|
35 };
|
nuclear@3
|
36
|
nuclear@2
|
37 struct Ray {
|
nuclear@2
|
38 float4 origin, dir;
|
nuclear@2
|
39 };
|
nuclear@2
|
40
|
nuclear@2
|
41 struct SurfPoint {
|
nuclear@2
|
42 float t;
|
nuclear@12
|
43 float4 pos, norm, dbg;
|
nuclear@9
|
44 global const struct Face *obj;
|
nuclear@19
|
45 struct Material mat;
|
nuclear@2
|
46 };
|
nuclear@2
|
47
|
nuclear@16
|
48 struct Scene {
|
nuclear@16
|
49 float4 ambient;
|
nuclear@16
|
50 global const struct Face *faces;
|
nuclear@16
|
51 int num_faces;
|
nuclear@16
|
52 global const struct Light *lights;
|
nuclear@16
|
53 int num_lights;
|
nuclear@16
|
54 global const struct Material *matlib;
|
nuclear@43
|
55 //global const struct KDNode *kdtree;
|
nuclear@47
|
56 bool cast_shadows;
|
nuclear@28
|
57 };
|
nuclear@28
|
58
|
nuclear@28
|
59 struct AABBox {
|
nuclear@28
|
60 float4 min, max;
|
nuclear@28
|
61 };
|
nuclear@28
|
62
|
nuclear@28
|
63 struct KDNode {
|
nuclear@29
|
64 struct AABBox aabb;
|
nuclear@43
|
65 int face_idx[MAX_NODE_FACES];
|
nuclear@28
|
66 int num_faces;
|
nuclear@35
|
67 int left, right;
|
nuclear@35
|
68 int padding;
|
nuclear@16
|
69 };
|
nuclear@2
|
70
|
nuclear@16
|
71 #define MIN_ENERGY 0.001
|
nuclear@21
|
72 #define EPSILON 1e-5
|
nuclear@16
|
73
|
nuclear@43
|
74 float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg);
|
nuclear@43
|
75 bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp, read_only image2d_t kdimg);
|
nuclear@9
|
76 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp);
|
nuclear@28
|
77 bool intersect_aabb(struct Ray ray, struct AABBox aabb);
|
nuclear@16
|
78
|
nuclear@8
|
79 float4 reflect(float4 v, float4 n);
|
nuclear@8
|
80 float4 transform(float4 v, global const float *xform);
|
nuclear@16
|
81 void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans);
|
nuclear@12
|
82 float4 calc_bary(float4 pt, global const struct Face *face, float4 norm);
|
nuclear@19
|
83 float mean(float4 v);
|
nuclear@4
|
84
|
nuclear@43
|
85 void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg);
|
nuclear@43
|
86
|
nuclear@39
|
87
|
nuclear@39
|
88 kernel void render(write_only image2d_t fb,
|
nuclear@4
|
89 global const struct RendInfo *rinf,
|
nuclear@9
|
90 global const struct Face *faces,
|
nuclear@9
|
91 global const struct Material *matlib,
|
nuclear@4
|
92 global const struct Light *lights,
|
nuclear@7
|
93 global const struct Ray *primrays,
|
nuclear@12
|
94 global const float *xform,
|
nuclear@28
|
95 global const float *invtrans,
|
nuclear@43
|
96 //global const struct KDNode *kdtree
|
nuclear@43
|
97 read_only image2d_t kdtree_img)
|
nuclear@2
|
98 {
|
nuclear@2
|
99 int idx = get_global_id(0);
|
nuclear@2
|
100
|
nuclear@16
|
101 struct Scene scn;
|
nuclear@16
|
102 scn.ambient = rinf->ambient;
|
nuclear@16
|
103 scn.faces = faces;
|
nuclear@16
|
104 scn.num_faces = rinf->num_faces;
|
nuclear@16
|
105 scn.lights = lights;
|
nuclear@16
|
106 scn.num_lights = rinf->num_lights;
|
nuclear@16
|
107 scn.matlib = matlib;
|
nuclear@47
|
108 scn.cast_shadows = rinf->cast_shadows;
|
nuclear@8
|
109
|
nuclear@16
|
110 struct Ray ray = primrays[idx];
|
nuclear@16
|
111 transform_ray(&ray, xform, invtrans);
|
nuclear@4
|
112
|
nuclear@19
|
113 float4 pixel = (float4)(0, 0, 0, 0);
|
nuclear@22
|
114 float4 energy = (float4)(1.0, 1.0, 1.0, 0.0);
|
nuclear@19
|
115 int iter = 0;
|
nuclear@19
|
116
|
nuclear@47
|
117 while(iter++ <= rinf->max_iter && mean(energy) > MIN_ENERGY) {
|
nuclear@19
|
118 struct SurfPoint sp;
|
nuclear@43
|
119 if(find_intersection(ray, &scn, &sp, kdtree_img)) {
|
nuclear@43
|
120 pixel += shade(ray, &scn, &sp, kdtree_img) * energy;
|
nuclear@19
|
121
|
nuclear@19
|
122 float4 refl_col = sp.mat.ks * sp.mat.kr;
|
nuclear@19
|
123
|
nuclear@19
|
124 ray.origin = sp.pos;
|
nuclear@19
|
125 ray.dir = reflect(-ray.dir, sp.norm);
|
nuclear@19
|
126
|
nuclear@35
|
127 energy *= refl_col;
|
nuclear@19
|
128 } else {
|
nuclear@43
|
129 energy = (float4)(0.0, 0.0, 0.0, 0.0);
|
nuclear@19
|
130 }
|
nuclear@17
|
131 }
|
nuclear@19
|
132
|
nuclear@39
|
133 int2 coord;
|
nuclear@43
|
134 coord.x = idx % rinf->xsz;
|
nuclear@43
|
135 coord.y = idx / rinf->xsz;
|
nuclear@39
|
136
|
nuclear@39
|
137 write_imagef(fb, coord, pixel);
|
nuclear@4
|
138 }
|
nuclear@4
|
139
|
nuclear@43
|
140 float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg)
|
nuclear@16
|
141 {
|
nuclear@16
|
142 float4 norm = sp->norm;
|
nuclear@43
|
143 //bool entering = true;
|
nuclear@12
|
144
|
nuclear@12
|
145 if(dot(ray.dir, norm) >= 0.0) {
|
nuclear@12
|
146 norm = -norm;
|
nuclear@43
|
147 //entering = false;
|
nuclear@12
|
148 }
|
nuclear@12
|
149
|
nuclear@19
|
150 float4 dcol = scn->ambient * sp->mat.kd;
|
nuclear@8
|
151 float4 scol = (float4)(0, 0, 0, 0);
|
nuclear@5
|
152
|
nuclear@16
|
153 for(int i=0; i<scn->num_lights; i++) {
|
nuclear@16
|
154 float4 ldir = scn->lights[i].pos - sp->pos;
|
nuclear@5
|
155
|
nuclear@16
|
156 struct Ray shadowray;
|
nuclear@16
|
157 shadowray.origin = sp->pos;
|
nuclear@16
|
158 shadowray.dir = ldir;
|
nuclear@5
|
159
|
nuclear@47
|
160 if(!scn->cast_shadows || !find_intersection(shadowray, scn, 0, kdimg)) {
|
nuclear@16
|
161 ldir = normalize(ldir);
|
nuclear@43
|
162 float4 vdir = -ray.dir;
|
nuclear@43
|
163 vdir.x = native_divide(vdir.x, RAY_MAG);
|
nuclear@43
|
164 vdir.y = native_divide(vdir.y, RAY_MAG);
|
nuclear@43
|
165 vdir.z = native_divide(vdir.z, RAY_MAG);
|
nuclear@16
|
166 float4 vref = reflect(vdir, norm);
|
nuclear@16
|
167
|
nuclear@16
|
168 float diff = fmax(dot(ldir, norm), 0.0f);
|
nuclear@43
|
169 dcol += sp->mat.kd /* scn->lights[i].color*/ * diff;
|
nuclear@16
|
170
|
nuclear@43
|
171 float spec = native_powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow);
|
nuclear@43
|
172 scol += sp->mat.ks /* scn->lights[i].color*/ * spec;
|
nuclear@16
|
173 }
|
nuclear@16
|
174 }
|
nuclear@16
|
175
|
nuclear@8
|
176 return dcol + scol;
|
nuclear@2
|
177 }
|
nuclear@2
|
178
|
nuclear@45
|
179 #define STACK_SIZE MAX_TREE_DEPTH
|
nuclear@43
|
180 bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg)
|
nuclear@28
|
181 {
|
nuclear@29
|
182 struct SurfPoint sp0;
|
nuclear@29
|
183 sp0.t = 1.0;
|
nuclear@29
|
184 sp0.obj = 0;
|
nuclear@29
|
185
|
nuclear@29
|
186 int idxstack[STACK_SIZE];
|
nuclear@31
|
187 int top = 0; // points after the topmost element of the stack
|
nuclear@35
|
188 idxstack[top++] = 0; // root at tree[0]
|
nuclear@29
|
189
|
nuclear@31
|
190 while(top > 0) {
|
nuclear@31
|
191 int idx = idxstack[--top]; // remove this index from the stack and process it
|
nuclear@30
|
192
|
nuclear@43
|
193 struct KDNode node;
|
nuclear@43
|
194 read_kdnode(idx, &node, kdimg);
|
nuclear@29
|
195
|
nuclear@43
|
196 if(intersect_aabb(ray, node.aabb)) {
|
nuclear@43
|
197 if(node.left == -1) {
|
nuclear@31
|
198 // leaf node... check each face in turn and update the nearest intersection as needed
|
nuclear@43
|
199 for(int i=0; i<node.num_faces; i++) {
|
nuclear@31
|
200 struct SurfPoint spt;
|
nuclear@43
|
201 int fidx = node.face_idx[i];
|
nuclear@29
|
202
|
nuclear@31
|
203 if(intersect(ray, scn->faces + fidx, &spt) && spt.t < sp0.t) {
|
nuclear@31
|
204 sp0 = spt;
|
nuclear@29
|
205 }
|
nuclear@29
|
206 }
|
nuclear@31
|
207 } else {
|
nuclear@31
|
208 // internal node... recurse to the children
|
nuclear@43
|
209 idxstack[top++] = node.left;
|
nuclear@43
|
210 idxstack[top++] = node.right;
|
nuclear@29
|
211 }
|
nuclear@29
|
212 }
|
nuclear@29
|
213 }
|
nuclear@29
|
214
|
nuclear@29
|
215 if(!sp0.obj) {
|
nuclear@29
|
216 return false;
|
nuclear@29
|
217 }
|
nuclear@29
|
218
|
nuclear@29
|
219 if(spres) {
|
nuclear@29
|
220 *spres = sp0;
|
nuclear@29
|
221 spres->mat = scn->matlib[sp0.obj->matid];
|
nuclear@29
|
222 }
|
nuclear@29
|
223 return true;
|
nuclear@28
|
224 }
|
nuclear@16
|
225
|
nuclear@16
|
226 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp)
|
nuclear@2
|
227 {
|
nuclear@12
|
228 float4 origin = ray.origin;
|
nuclear@12
|
229 float4 dir = ray.dir;
|
nuclear@12
|
230 float4 norm = face->normal;
|
nuclear@12
|
231
|
nuclear@16
|
232 float ndotdir = dot(dir, norm);
|
nuclear@12
|
233
|
nuclear@9
|
234 if(fabs(ndotdir) <= EPSILON) {
|
nuclear@9
|
235 return false;
|
nuclear@9
|
236 }
|
nuclear@2
|
237
|
nuclear@9
|
238 float4 pt = face->v[0].pos;
|
nuclear@12
|
239 float4 vec = pt - origin;
|
nuclear@2
|
240
|
nuclear@16
|
241 float ndotvec = dot(norm, vec);
|
nuclear@43
|
242 float t = native_divide(ndotvec, ndotdir);
|
nuclear@2
|
243
|
nuclear@2
|
244 if(t < EPSILON || t > 1.0) {
|
nuclear@2
|
245 return false;
|
nuclear@2
|
246 }
|
nuclear@12
|
247 pt = origin + dir * t;
|
nuclear@9
|
248
|
nuclear@12
|
249
|
nuclear@12
|
250 float4 bc = calc_bary(pt, face, norm);
|
nuclear@9
|
251 float bc_sum = bc.x + bc.y + bc.z;
|
nuclear@9
|
252
|
nuclear@20
|
253 if(bc_sum < 1.0 - EPSILON || bc_sum > 1.0 + EPSILON) {
|
nuclear@9
|
254 return false;
|
nuclear@12
|
255 bc *= 1.2;
|
nuclear@9
|
256 }
|
nuclear@2
|
257
|
nuclear@2
|
258 sp->t = t;
|
nuclear@9
|
259 sp->pos = pt;
|
nuclear@21
|
260 sp->norm = normalize(face->v[0].normal * bc.x + face->v[1].normal * bc.y + face->v[2].normal * bc.z);
|
nuclear@9
|
261 sp->obj = face;
|
nuclear@12
|
262 sp->dbg = bc;
|
nuclear@2
|
263 return true;
|
nuclear@2
|
264 }
|
nuclear@5
|
265
|
nuclear@28
|
266 bool intersect_aabb(struct Ray ray, struct AABBox aabb)
|
nuclear@28
|
267 {
|
nuclear@28
|
268 if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z &&
|
nuclear@28
|
269 ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) {
|
nuclear@28
|
270 return true;
|
nuclear@28
|
271 }
|
nuclear@28
|
272
|
nuclear@29
|
273 float4 bbox[2] = {
|
nuclear@29
|
274 aabb.min.x, aabb.min.y, aabb.min.z, 0,
|
nuclear@29
|
275 aabb.max.x, aabb.max.y, aabb.max.z, 0
|
nuclear@29
|
276 };
|
nuclear@28
|
277
|
nuclear@28
|
278 int xsign = (int)(ray.dir.x < 0.0);
|
nuclear@43
|
279 float invdirx = native_recip(ray.dir.x);
|
nuclear@28
|
280 float tmin = (bbox[xsign].x - ray.origin.x) * invdirx;
|
nuclear@28
|
281 float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx;
|
nuclear@28
|
282
|
nuclear@28
|
283 int ysign = (int)(ray.dir.y < 0.0);
|
nuclear@43
|
284 float invdiry = native_recip(ray.dir.y);
|
nuclear@28
|
285 float tymin = (bbox[ysign].y - ray.origin.y) * invdiry;
|
nuclear@28
|
286 float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry;
|
nuclear@28
|
287
|
nuclear@28
|
288 if(tmin > tymax || tymin > tmax) {
|
nuclear@28
|
289 return false;
|
nuclear@28
|
290 }
|
nuclear@28
|
291
|
nuclear@28
|
292 if(tymin > tmin) tmin = tymin;
|
nuclear@28
|
293 if(tymax < tmax) tmax = tymax;
|
nuclear@28
|
294
|
nuclear@28
|
295 int zsign = (int)(ray.dir.z < 0.0);
|
nuclear@43
|
296 float invdirz = native_recip(ray.dir.z);
|
nuclear@28
|
297 float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz;
|
nuclear@28
|
298 float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz;
|
nuclear@28
|
299
|
nuclear@28
|
300 if(tmin > tzmax || tzmin > tmax) {
|
nuclear@28
|
301 return false;
|
nuclear@28
|
302 }
|
nuclear@28
|
303
|
nuclear@29
|
304 return tmin < 1.0 && tmax > 0.0;
|
nuclear@28
|
305 }
|
nuclear@28
|
306
|
nuclear@8
|
307 float4 reflect(float4 v, float4 n)
|
nuclear@5
|
308 {
|
nuclear@23
|
309 return 2.0f * dot(v, n) * n - v;
|
nuclear@5
|
310 }
|
nuclear@8
|
311
|
nuclear@8
|
312 float4 transform(float4 v, global const float *xform)
|
nuclear@8
|
313 {
|
nuclear@8
|
314 float4 res;
|
nuclear@8
|
315 res.x = v.x * xform[0] + v.y * xform[4] + v.z * xform[8] + xform[12];
|
nuclear@8
|
316 res.y = v.x * xform[1] + v.y * xform[5] + v.z * xform[9] + xform[13];
|
nuclear@8
|
317 res.z = v.x * xform[2] + v.y * xform[6] + v.z * xform[10] + xform[14];
|
nuclear@12
|
318 res.w = 0.0;
|
nuclear@8
|
319 return res;
|
nuclear@8
|
320 }
|
nuclear@8
|
321
|
nuclear@16
|
322 void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans)
|
nuclear@8
|
323 {
|
nuclear@16
|
324 ray->origin = transform(ray->origin, xform);
|
nuclear@16
|
325 ray->dir = transform(ray->dir, invtrans);
|
nuclear@8
|
326 }
|
nuclear@9
|
327
|
nuclear@12
|
328 float4 calc_bary(float4 pt, global const struct Face *face, float4 norm)
|
nuclear@9
|
329 {
|
nuclear@12
|
330 float4 bc = (float4)(0, 0, 0, 0);
|
nuclear@9
|
331
|
nuclear@12
|
332 // calculate area of the whole triangle
|
nuclear@12
|
333 float4 v1 = face->v[1].pos - face->v[0].pos;
|
nuclear@12
|
334 float4 v2 = face->v[2].pos - face->v[0].pos;
|
nuclear@12
|
335 float4 xv1v2 = cross(v1, v2);
|
nuclear@12
|
336
|
nuclear@16
|
337 float area = fabs(dot(xv1v2, norm)) * 0.5;
|
nuclear@9
|
338 if(area < EPSILON) {
|
nuclear@9
|
339 return bc;
|
nuclear@9
|
340 }
|
nuclear@9
|
341
|
nuclear@9
|
342 float4 pv0 = face->v[0].pos - pt;
|
nuclear@9
|
343 float4 pv1 = face->v[1].pos - pt;
|
nuclear@9
|
344 float4 pv2 = face->v[2].pos - pt;
|
nuclear@9
|
345
|
nuclear@12
|
346 // calculate the area of each sub-triangle
|
nuclear@12
|
347 float4 x12 = cross(pv1, pv2);
|
nuclear@12
|
348 float4 x20 = cross(pv2, pv0);
|
nuclear@12
|
349 float4 x01 = cross(pv0, pv1);
|
nuclear@12
|
350
|
nuclear@16
|
351 float a0 = fabs(dot(x12, norm)) * 0.5;
|
nuclear@16
|
352 float a1 = fabs(dot(x20, norm)) * 0.5;
|
nuclear@16
|
353 float a2 = fabs(dot(x01, norm)) * 0.5;
|
nuclear@9
|
354
|
nuclear@43
|
355 bc.x = native_divide(a0, area);
|
nuclear@43
|
356 bc.y = native_divide(a1, area);
|
nuclear@43
|
357 bc.z = native_divide(a2, area);
|
nuclear@9
|
358 return bc;
|
nuclear@9
|
359 }
|
nuclear@19
|
360
|
nuclear@19
|
361 float mean(float4 v)
|
nuclear@19
|
362 {
|
nuclear@19
|
363 return native_divide(v.x + v.y + v.z, 3.0);
|
nuclear@19
|
364 }
|
nuclear@43
|
365
|
nuclear@43
|
366
|
nuclear@43
|
367 const sampler_t kdsampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
|
nuclear@43
|
368
|
nuclear@43
|
369 // read a KD-tree node from a texture scanline
|
nuclear@43
|
370 void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg)
|
nuclear@43
|
371 {
|
nuclear@45
|
372 int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT);
|
nuclear@45
|
373
|
nuclear@43
|
374 int2 tc;
|
nuclear@45
|
375 tc.x = startx;
|
nuclear@45
|
376 tc.y = idx % KDIMG_MAX_HEIGHT;
|
nuclear@43
|
377
|
nuclear@43
|
378 node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++;
|
nuclear@43
|
379 node->aabb.max = read_imagef(kdimg, kdsampler, tc);
|
nuclear@43
|
380
|
nuclear@45
|
381 tc.x = startx + 2 + MAX_NODE_FACES / 4;
|
nuclear@43
|
382 float4 pix = read_imagef(kdimg, kdsampler, tc);
|
nuclear@43
|
383 node->num_faces = (int)pix.x;
|
nuclear@43
|
384 node->left = (int)pix.y;
|
nuclear@43
|
385 node->right = (int)pix.z;
|
nuclear@43
|
386
|
nuclear@45
|
387 tc.x = startx + 2;
|
nuclear@43
|
388 for(int i=0; i<node->num_faces; i+=4) {
|
nuclear@43
|
389 float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++;
|
nuclear@43
|
390 node->face_idx[i] = (int)pix.x;
|
nuclear@43
|
391 node->face_idx[i + 1] = (int)pix.y;
|
nuclear@43
|
392 node->face_idx[i + 2] = (int)pix.z;
|
nuclear@43
|
393 node->face_idx[i + 3] = (int)pix.w;
|
nuclear@43
|
394 }
|
nuclear@43
|
395 }
|