# HG changeset patch # User John Tsiombikas # Date 1283052042 -3600 # Node ID 8047637961a251e02cc1a41f374506409ca92d6a # Parent e7f79c6ad246256bfe3745cd40374715a12c00ec fixed the issue of hitting maximum vertical image sizes for large kdtrees diff -r e7f79c6ad246 -r 8047637961a2 Makefile --- a/Makefile Sat Aug 28 21:50:17 2010 +0100 +++ b/Makefile Sun Aug 29 04:20:42 2010 +0100 @@ -3,15 +3,18 @@ dep = $(obj:.o=.d) bin = test +opt = -O3 -ffast-math +dbg = -g + CXX = g++ -CXXFLAGS = -pedantic -Wall -g $(def) +CXXFLAGS = -pedantic -Wall $(dbg) $(opt) $(def) LDFLAGS = $(libgl) $(libcl) -lpthread ifeq ($(shell uname -s), Darwin) libgl = -framework OpenGL -framework GLUT libcl = -framework OpenCL else - libgl = -lGL -lglut + libgl = -lGL -lGLU -lglut libcl = -lOpenCL def = -DCLGL_INTEROP endif diff -r e7f79c6ad246 -r 8047637961a2 rt.cl --- a/rt.cl Sat Aug 28 21:50:17 2010 +0100 +++ b/rt.cl Sun Aug 29 04:20:42 2010 +0100 @@ -1,4 +1,5 @@ /* vim: set ft=opencl:ts=4:sw=4 */ +#include "common.h" struct RendInfo { float4 ambient; @@ -58,7 +59,6 @@ float4 min, max; }; -#define MAX_NODE_FACES 32 struct KDNode { struct AABBox aabb; int face_idx[MAX_NODE_FACES]; @@ -67,7 +67,6 @@ int padding; }; -#define RAY_MAG 500.0 #define MIN_ENERGY 0.001 #define EPSILON 1e-5 @@ -176,7 +175,7 @@ return dcol + scol; } -#define STACK_SIZE 64 +#define STACK_SIZE MAX_TREE_DEPTH bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg) { struct SurfPoint sp0; @@ -369,20 +368,22 @@ // read a KD-tree node from a texture scanline void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg) { + int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT); + int2 tc; - tc.x = 0; - tc.y = idx; + tc.x = startx; + tc.y = idx % KDIMG_MAX_HEIGHT; node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++; node->aabb.max = read_imagef(kdimg, kdsampler, tc); - tc.x = 2 + MAX_NODE_FACES / 4; + tc.x = startx + 2 + MAX_NODE_FACES / 4; float4 pix = read_imagef(kdimg, kdsampler, tc); node->num_faces = (int)pix.x; node->left = (int)pix.y; node->right = (int)pix.z; - tc.x = 2; + tc.x = startx + 2; for(int i=0; inum_faces; i+=4) { float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++; node->face_idx[i] = (int)pix.x; diff -r e7f79c6ad246 -r 8047637961a2 src/ocl.cc --- a/src/ocl.cc Sat Aug 28 21:50:17 2010 +0100 +++ b/src/ocl.cc Sun Aug 29 04:20:42 2010 +0100 @@ -61,8 +61,6 @@ return false; } - - #ifndef CLGL_INTEROP cl_context_properties *prop = 0; #else @@ -499,12 +497,9 @@ return num_args; } -bool CLProgram::build() +bool CLProgram::build(const char *opt) { int err; - - const char *opt = "-cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math"; - if((err = clBuildProgram(prog, 0, 0, opt, 0, 0)) != 0) { size_t sz; clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz); diff -r e7f79c6ad246 -r 8047637961a2 src/ocl.h --- a/src/ocl.h Sat Aug 28 21:50:17 2010 +0100 +++ b/src/ocl.h Sun Aug 29 04:20:42 2010 +0100 @@ -103,7 +103,7 @@ CLMemBuffer *get_arg_buffer(int arg); int get_num_args() const; - bool build(); + bool build(const char *opt = 0); bool run() const; bool run(int dim, ...) const; diff -r e7f79c6ad246 -r 8047637961a2 src/rt.cc --- a/src/rt.cc Sat Aug 28 21:50:17 2010 +0100 +++ b/src/rt.cc Sun Aug 29 04:20:42 2010 +0100 @@ -6,6 +6,7 @@ #include "ocl.h" #include "scene.h" #include "timer.h" +#include "common.h" // kernel arguments enum { @@ -122,7 +123,8 @@ return false; } - if(!prog->build()) { + const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math"; + if(!prog->build(opt)) { return false; } @@ -291,29 +293,19 @@ float mag = sqrt(px * px + py * py + pz * pz); - px = px * 500.0 / mag; - py = py * 500.0 / mag; - pz = pz * 500.0 / mag; + px = px * RAY_MAG / mag; + py = py * RAY_MAG / mag; + pz = pz * RAY_MAG / mag; Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}}; return ray; } -static int next_pow2(int x) -{ - x--; - x = (x >> 1) | x; - x = (x >> 2) | x; - x = (x >> 4) | x; - x = (x >> 8) | x; - x = (x >> 16) | x; - return x + 1; -} - static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret) { - int xsz = 16; - int ysz = next_pow2(num_nodes); + int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT); + int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1; + int xsz = KDIMG_NODE_WIDTH * columns; printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes); @@ -321,7 +313,10 @@ memset(img, 0, 4 * xsz * ysz * sizeof *img); for(int i=0; i #include #include +#include "common.h" struct Vertex { float pos[4]; @@ -51,7 +52,6 @@ KDNode(); }; -#define MAX_NODE_FACES 32 struct KDNodeGPU { AABBox aabb; int face_idx[MAX_NODE_FACES];