clray
diff src/rt.cc @ 45:8047637961a2
fixed the issue of hitting maximum vertical image sizes for large kdtrees
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sun, 29 Aug 2010 04:20:42 +0100 |
parents | f9eec11e5acc |
children | 30bf84881553 |
line diff
1.1 --- a/src/rt.cc Sat Aug 28 21:50:17 2010 +0100 1.2 +++ b/src/rt.cc Sun Aug 29 04:20:42 2010 +0100 1.3 @@ -6,6 +6,7 @@ 1.4 #include "ocl.h" 1.5 #include "scene.h" 1.6 #include "timer.h" 1.7 +#include "common.h" 1.8 1.9 // kernel arguments 1.10 enum { 1.11 @@ -122,7 +123,8 @@ 1.12 return false; 1.13 } 1.14 1.15 - if(!prog->build()) { 1.16 + const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math"; 1.17 + if(!prog->build(opt)) { 1.18 return false; 1.19 } 1.20 1.21 @@ -291,29 +293,19 @@ 1.22 1.23 float mag = sqrt(px * px + py * py + pz * pz); 1.24 1.25 - px = px * 500.0 / mag; 1.26 - py = py * 500.0 / mag; 1.27 - pz = pz * 500.0 / mag; 1.28 + px = px * RAY_MAG / mag; 1.29 + py = py * RAY_MAG / mag; 1.30 + pz = pz * RAY_MAG / mag; 1.31 1.32 Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}}; 1.33 return ray; 1.34 } 1.35 1.36 -static int next_pow2(int x) 1.37 -{ 1.38 - x--; 1.39 - x = (x >> 1) | x; 1.40 - x = (x >> 2) | x; 1.41 - x = (x >> 4) | x; 1.42 - x = (x >> 8) | x; 1.43 - x = (x >> 16) | x; 1.44 - return x + 1; 1.45 -} 1.46 - 1.47 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret) 1.48 { 1.49 - int xsz = 16; 1.50 - int ysz = next_pow2(num_nodes); 1.51 + int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT); 1.52 + int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1; 1.53 + int xsz = KDIMG_NODE_WIDTH * columns; 1.54 1.55 printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes); 1.56 1.57 @@ -321,7 +313,10 @@ 1.58 memset(img, 0, 4 * xsz * ysz * sizeof *img); 1.59 1.60 for(int i=0; i<num_nodes; i++) { 1.61 - float *ptr = img + i * 4 * xsz; 1.62 + int x = KDIMG_NODE_WIDTH * (i / KDIMG_MAX_HEIGHT); 1.63 + int y = i % KDIMG_MAX_HEIGHT; 1.64 + 1.65 + float *ptr = img + (y * xsz + x) * 4; 1.66 1.67 *ptr++ = kdtree[i].aabb.min[0]; 1.68 *ptr++ = kdtree[i].aabb.min[1];