clray
changeset 45:8047637961a2
fixed the issue of hitting maximum vertical image sizes for large kdtrees
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sun, 29 Aug 2010 04:20:42 +0100 |
parents | e7f79c6ad246 |
children | b5eb404af481 |
files | Makefile rt.cl src/ocl.cc src/ocl.h src/rt.cc src/scene.h |
diffstat | 6 files changed, 29 insertions(+), 35 deletions(-) [+] |
line diff
1.1 --- a/Makefile Sat Aug 28 21:50:17 2010 +0100 1.2 +++ b/Makefile Sun Aug 29 04:20:42 2010 +0100 1.3 @@ -3,15 +3,18 @@ 1.4 dep = $(obj:.o=.d) 1.5 bin = test 1.6 1.7 +opt = -O3 -ffast-math 1.8 +dbg = -g 1.9 + 1.10 CXX = g++ 1.11 -CXXFLAGS = -pedantic -Wall -g $(def) 1.12 +CXXFLAGS = -pedantic -Wall $(dbg) $(opt) $(def) 1.13 LDFLAGS = $(libgl) $(libcl) -lpthread 1.14 1.15 ifeq ($(shell uname -s), Darwin) 1.16 libgl = -framework OpenGL -framework GLUT 1.17 libcl = -framework OpenCL 1.18 else 1.19 - libgl = -lGL -lglut 1.20 + libgl = -lGL -lGLU -lglut 1.21 libcl = -lOpenCL 1.22 def = -DCLGL_INTEROP 1.23 endif
2.1 --- a/rt.cl Sat Aug 28 21:50:17 2010 +0100 2.2 +++ b/rt.cl Sun Aug 29 04:20:42 2010 +0100 2.3 @@ -1,4 +1,5 @@ 2.4 /* vim: set ft=opencl:ts=4:sw=4 */ 2.5 +#include "common.h" 2.6 2.7 struct RendInfo { 2.8 float4 ambient; 2.9 @@ -58,7 +59,6 @@ 2.10 float4 min, max; 2.11 }; 2.12 2.13 -#define MAX_NODE_FACES 32 2.14 struct KDNode { 2.15 struct AABBox aabb; 2.16 int face_idx[MAX_NODE_FACES]; 2.17 @@ -67,7 +67,6 @@ 2.18 int padding; 2.19 }; 2.20 2.21 -#define RAY_MAG 500.0 2.22 #define MIN_ENERGY 0.001 2.23 #define EPSILON 1e-5 2.24 2.25 @@ -176,7 +175,7 @@ 2.26 return dcol + scol; 2.27 } 2.28 2.29 -#define STACK_SIZE 64 2.30 +#define STACK_SIZE MAX_TREE_DEPTH 2.31 bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg) 2.32 { 2.33 struct SurfPoint sp0; 2.34 @@ -369,20 +368,22 @@ 2.35 // read a KD-tree node from a texture scanline 2.36 void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg) 2.37 { 2.38 + int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT); 2.39 + 2.40 int2 tc; 2.41 - tc.x = 0; 2.42 - tc.y = idx; 2.43 + tc.x = startx; 2.44 + tc.y = idx % KDIMG_MAX_HEIGHT; 2.45 2.46 node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++; 2.47 node->aabb.max = read_imagef(kdimg, kdsampler, tc); 2.48 2.49 - tc.x = 2 + MAX_NODE_FACES / 4; 2.50 + tc.x = startx + 2 + MAX_NODE_FACES / 4; 2.51 float4 pix = read_imagef(kdimg, kdsampler, tc); 2.52 node->num_faces = (int)pix.x; 2.53 node->left = (int)pix.y; 2.54 node->right = (int)pix.z; 2.55 2.56 - tc.x = 2; 2.57 + tc.x = startx + 2; 2.58 for(int i=0; i<node->num_faces; i+=4) { 2.59 float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++; 2.60 node->face_idx[i] = (int)pix.x;
3.1 --- a/src/ocl.cc Sat Aug 28 21:50:17 2010 +0100 3.2 +++ b/src/ocl.cc Sun Aug 29 04:20:42 2010 +0100 3.3 @@ -61,8 +61,6 @@ 3.4 return false; 3.5 } 3.6 3.7 - 3.8 - 3.9 #ifndef CLGL_INTEROP 3.10 cl_context_properties *prop = 0; 3.11 #else 3.12 @@ -499,12 +497,9 @@ 3.13 return num_args; 3.14 } 3.15 3.16 -bool CLProgram::build() 3.17 +bool CLProgram::build(const char *opt) 3.18 { 3.19 int err; 3.20 - 3.21 - const char *opt = "-cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math"; 3.22 - 3.23 if((err = clBuildProgram(prog, 0, 0, opt, 0, 0)) != 0) { 3.24 size_t sz; 3.25 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
4.1 --- a/src/ocl.h Sat Aug 28 21:50:17 2010 +0100 4.2 +++ b/src/ocl.h Sun Aug 29 04:20:42 2010 +0100 4.3 @@ -103,7 +103,7 @@ 4.4 CLMemBuffer *get_arg_buffer(int arg); 4.5 int get_num_args() const; 4.6 4.7 - bool build(); 4.8 + bool build(const char *opt = 0); 4.9 4.10 bool run() const; 4.11 bool run(int dim, ...) const;
5.1 --- a/src/rt.cc Sat Aug 28 21:50:17 2010 +0100 5.2 +++ b/src/rt.cc Sun Aug 29 04:20:42 2010 +0100 5.3 @@ -6,6 +6,7 @@ 5.4 #include "ocl.h" 5.5 #include "scene.h" 5.6 #include "timer.h" 5.7 +#include "common.h" 5.8 5.9 // kernel arguments 5.10 enum { 5.11 @@ -122,7 +123,8 @@ 5.12 return false; 5.13 } 5.14 5.15 - if(!prog->build()) { 5.16 + const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math"; 5.17 + if(!prog->build(opt)) { 5.18 return false; 5.19 } 5.20 5.21 @@ -291,29 +293,19 @@ 5.22 5.23 float mag = sqrt(px * px + py * py + pz * pz); 5.24 5.25 - px = px * 500.0 / mag; 5.26 - py = py * 500.0 / mag; 5.27 - pz = pz * 500.0 / mag; 5.28 + px = px * RAY_MAG / mag; 5.29 + py = py * RAY_MAG / mag; 5.30 + pz = pz * RAY_MAG / mag; 5.31 5.32 Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}}; 5.33 return ray; 5.34 } 5.35 5.36 -static int next_pow2(int x) 5.37 -{ 5.38 - x--; 5.39 - x = (x >> 1) | x; 5.40 - x = (x >> 2) | x; 5.41 - x = (x >> 4) | x; 5.42 - x = (x >> 8) | x; 5.43 - x = (x >> 16) | x; 5.44 - return x + 1; 5.45 -} 5.46 - 5.47 static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret) 5.48 { 5.49 - int xsz = 16; 5.50 - int ysz = next_pow2(num_nodes); 5.51 + int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT); 5.52 + int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1; 5.53 + int xsz = KDIMG_NODE_WIDTH * columns; 5.54 5.55 printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes); 5.56 5.57 @@ -321,7 +313,10 @@ 5.58 memset(img, 0, 4 * xsz * ysz * sizeof *img); 5.59 5.60 for(int i=0; i<num_nodes; i++) { 5.61 - float *ptr = img + i * 4 * xsz; 5.62 + int x = KDIMG_NODE_WIDTH * (i / KDIMG_MAX_HEIGHT); 5.63 + int y = i % KDIMG_MAX_HEIGHT; 5.64 + 5.65 + float *ptr = img + (y * xsz + x) * 4; 5.66 5.67 *ptr++ = kdtree[i].aabb.min[0]; 5.68 *ptr++ = kdtree[i].aabb.min[1];
6.1 --- a/src/scene.h Sat Aug 28 21:50:17 2010 +0100 6.2 +++ b/src/scene.h Sun Aug 29 04:20:42 2010 +0100 6.3 @@ -4,6 +4,7 @@ 6.4 #include <stdio.h> 6.5 #include <vector> 6.6 #include <list> 6.7 +#include "common.h" 6.8 6.9 struct Vertex { 6.10 float pos[4]; 6.11 @@ -51,7 +52,6 @@ 6.12 KDNode(); 6.13 }; 6.14 6.15 -#define MAX_NODE_FACES 32 6.16 struct KDNodeGPU { 6.17 AABBox aabb; 6.18 int face_idx[MAX_NODE_FACES];