clray
diff rt.cl @ 43:f9eec11e5acc
shoehorned the kdtree into an opnecl image and improved performance slightly
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 28 Aug 2010 09:38:49 +0100 |
parents | 1bcbb53b3505 |
children | 8047637961a2 |
line diff
1.1 --- a/rt.cl Sat Aug 28 02:01:16 2010 +0100 1.2 +++ b/rt.cl Sat Aug 28 09:38:49 2010 +0100 1.3 @@ -51,26 +51,28 @@ 1.4 global const struct Light *lights; 1.5 int num_lights; 1.6 global const struct Material *matlib; 1.7 - global const struct KDNode *kdtree; 1.8 + //global const struct KDNode *kdtree; 1.9 }; 1.10 1.11 struct AABBox { 1.12 float4 min, max; 1.13 }; 1.14 1.15 +#define MAX_NODE_FACES 32 1.16 struct KDNode { 1.17 struct AABBox aabb; 1.18 - int face_idx[32]; 1.19 + int face_idx[MAX_NODE_FACES]; 1.20 int num_faces; 1.21 int left, right; 1.22 int padding; 1.23 }; 1.24 1.25 +#define RAY_MAG 500.0 1.26 #define MIN_ENERGY 0.001 1.27 #define EPSILON 1e-5 1.28 1.29 -float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp); 1.30 -bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp); 1.31 +float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg); 1.32 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp, read_only image2d_t kdimg); 1.33 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp); 1.34 bool intersect_aabb(struct Ray ray, struct AABBox aabb); 1.35 1.36 @@ -80,6 +82,8 @@ 1.37 float4 calc_bary(float4 pt, global const struct Face *face, float4 norm); 1.38 float mean(float4 v); 1.39 1.40 +void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg); 1.41 + 1.42 1.43 kernel void render(write_only image2d_t fb, 1.44 global const struct RendInfo *rinf, 1.45 @@ -89,7 +93,8 @@ 1.46 global const struct Ray *primrays, 1.47 global const float *xform, 1.48 global const float *invtrans, 1.49 - global const struct KDNode *kdtree) 1.50 + //global const struct KDNode *kdtree 1.51 + read_only image2d_t kdtree_img) 1.52 { 1.53 int idx = get_global_id(0); 1.54 1.55 @@ -100,7 +105,7 @@ 1.56 scn.lights = lights; 1.57 scn.num_lights = rinf->num_lights; 1.58 scn.matlib = matlib; 1.59 - scn.kdtree = kdtree; 1.60 + //scn.kdtree_img = kdtree_img; 1.61 1.62 struct Ray ray = primrays[idx]; 1.63 transform_ray(&ray, xform, invtrans); 1.64 @@ -111,8 +116,8 @@ 1.65 1.66 while(iter++ < rinf->max_iter && mean(energy) > MIN_ENERGY) { 1.67 struct SurfPoint sp; 1.68 - if(find_intersection(ray, &scn, &sp)) { 1.69 - pixel += shade(ray, &scn, &sp) * energy; 1.70 + if(find_intersection(ray, &scn, &sp, kdtree_img)) { 1.71 + pixel += shade(ray, &scn, &sp, kdtree_img) * energy; 1.72 1.73 float4 refl_col = sp.mat.ks * sp.mat.kr; 1.74 1.75 @@ -121,27 +126,25 @@ 1.76 1.77 energy *= refl_col; 1.78 } else { 1.79 - break; 1.80 + energy = (float4)(0.0, 0.0, 0.0, 0.0); 1.81 } 1.82 } 1.83 1.84 - int img_x = get_image_width(fb); 1.85 - 1.86 int2 coord; 1.87 - coord.x = idx % img_x; 1.88 - coord.y = idx / img_x; 1.89 + coord.x = idx % rinf->xsz; 1.90 + coord.y = idx / rinf->xsz; 1.91 1.92 write_imagef(fb, coord, pixel); 1.93 } 1.94 1.95 -float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp) 1.96 +float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg) 1.97 { 1.98 float4 norm = sp->norm; 1.99 - bool entering = true; 1.100 + //bool entering = true; 1.101 1.102 if(dot(ray.dir, norm) >= 0.0) { 1.103 norm = -norm; 1.104 - entering = false; 1.105 + //entering = false; 1.106 } 1.107 1.108 float4 dcol = scn->ambient * sp->mat.kd; 1.109 @@ -154,16 +157,19 @@ 1.110 shadowray.origin = sp->pos; 1.111 shadowray.dir = ldir; 1.112 1.113 - if(!find_intersection(shadowray, scn, 0)) { 1.114 + if(!find_intersection(shadowray, scn, 0, kdimg)) { 1.115 ldir = normalize(ldir); 1.116 - float4 vdir = -normalize(ray.dir); 1.117 + float4 vdir = -ray.dir; 1.118 + vdir.x = native_divide(vdir.x, RAY_MAG); 1.119 + vdir.y = native_divide(vdir.y, RAY_MAG); 1.120 + vdir.z = native_divide(vdir.z, RAY_MAG); 1.121 float4 vref = reflect(vdir, norm); 1.122 1.123 float diff = fmax(dot(ldir, norm), 0.0f); 1.124 - dcol += sp->mat.kd * scn->lights[i].color * diff; 1.125 + dcol += sp->mat.kd /* scn->lights[i].color*/ * diff; 1.126 1.127 - float spec = powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow); 1.128 - scol += sp->mat.ks * scn->lights[i].color * spec; 1.129 + float spec = native_powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow); 1.130 + scol += sp->mat.ks /* scn->lights[i].color*/ * spec; 1.131 } 1.132 } 1.133 1.134 @@ -171,7 +177,7 @@ 1.135 } 1.136 1.137 #define STACK_SIZE 64 1.138 -bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) 1.139 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg) 1.140 { 1.141 struct SurfPoint sp0; 1.142 sp0.t = 1.0; 1.143 @@ -184,14 +190,15 @@ 1.144 while(top > 0) { 1.145 int idx = idxstack[--top]; // remove this index from the stack and process it 1.146 1.147 - global const struct KDNode *node = scn->kdtree + idx; 1.148 + struct KDNode node; 1.149 + read_kdnode(idx, &node, kdimg); 1.150 1.151 - if(intersect_aabb(ray, node->aabb)) { 1.152 - if(node->left == -1) { 1.153 + if(intersect_aabb(ray, node.aabb)) { 1.154 + if(node.left == -1) { 1.155 // leaf node... check each face in turn and update the nearest intersection as needed 1.156 - for(int i=0; i<node->num_faces; i++) { 1.157 + for(int i=0; i<node.num_faces; i++) { 1.158 struct SurfPoint spt; 1.159 - int fidx = node->face_idx[i]; 1.160 + int fidx = node.face_idx[i]; 1.161 1.162 if(intersect(ray, scn->faces + fidx, &spt) && spt.t < sp0.t) { 1.163 sp0 = spt; 1.164 @@ -199,8 +206,8 @@ 1.165 } 1.166 } else { 1.167 // internal node... recurse to the children 1.168 - idxstack[top++] = node->left; 1.169 - idxstack[top++] = node->right; 1.170 + idxstack[top++] = node.left; 1.171 + idxstack[top++] = node.right; 1.172 } 1.173 } 1.174 } 1.175 @@ -232,7 +239,7 @@ 1.176 float4 vec = pt - origin; 1.177 1.178 float ndotvec = dot(norm, vec); 1.179 - float t = ndotvec / ndotdir; 1.180 + float t = native_divide(ndotvec, ndotdir); 1.181 1.182 if(t < EPSILON || t > 1.0) { 1.183 return false; 1.184 @@ -269,12 +276,12 @@ 1.185 }; 1.186 1.187 int xsign = (int)(ray.dir.x < 0.0); 1.188 - float invdirx = 1.0 / ray.dir.x; 1.189 + float invdirx = native_recip(ray.dir.x); 1.190 float tmin = (bbox[xsign].x - ray.origin.x) * invdirx; 1.191 float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx; 1.192 1.193 int ysign = (int)(ray.dir.y < 0.0); 1.194 - float invdiry = 1.0 / ray.dir.y; 1.195 + float invdiry = native_recip(ray.dir.y); 1.196 float tymin = (bbox[ysign].y - ray.origin.y) * invdiry; 1.197 float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry; 1.198 1.199 @@ -286,7 +293,7 @@ 1.200 if(tymax < tmax) tmax = tymax; 1.201 1.202 int zsign = (int)(ray.dir.z < 0.0); 1.203 - float invdirz = 1.0 / ray.dir.z; 1.204 + float invdirz = native_recip(ray.dir.z); 1.205 float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz; 1.206 float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz; 1.207 1.208 @@ -345,9 +352,9 @@ 1.209 float a1 = fabs(dot(x20, norm)) * 0.5; 1.210 float a2 = fabs(dot(x01, norm)) * 0.5; 1.211 1.212 - bc.x = a0 / area; 1.213 - bc.y = a1 / area; 1.214 - bc.z = a2 / area; 1.215 + bc.x = native_divide(a0, area); 1.216 + bc.y = native_divide(a1, area); 1.217 + bc.z = native_divide(a2, area); 1.218 return bc; 1.219 } 1.220 1.221 @@ -355,3 +362,32 @@ 1.222 { 1.223 return native_divide(v.x + v.y + v.z, 3.0); 1.224 } 1.225 + 1.226 + 1.227 +const sampler_t kdsampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; 1.228 + 1.229 +// read a KD-tree node from a texture scanline 1.230 +void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg) 1.231 +{ 1.232 + int2 tc; 1.233 + tc.x = 0; 1.234 + tc.y = idx; 1.235 + 1.236 + node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++; 1.237 + node->aabb.max = read_imagef(kdimg, kdsampler, tc); 1.238 + 1.239 + tc.x = 2 + MAX_NODE_FACES / 4; 1.240 + float4 pix = read_imagef(kdimg, kdsampler, tc); 1.241 + node->num_faces = (int)pix.x; 1.242 + node->left = (int)pix.y; 1.243 + node->right = (int)pix.z; 1.244 + 1.245 + tc.x = 2; 1.246 + for(int i=0; i<node->num_faces; i+=4) { 1.247 + float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++; 1.248 + node->face_idx[i] = (int)pix.x; 1.249 + node->face_idx[i + 1] = (int)pix.y; 1.250 + node->face_idx[i + 2] = (int)pix.z; 1.251 + node->face_idx[i + 3] = (int)pix.w; 1.252 + } 1.253 +}