clray

changeset 45:8047637961a2

fixed the issue of hitting maximum vertical image sizes for large kdtrees
author John Tsiombikas <nuclear@member.fsf.org>
date Sun, 29 Aug 2010 04:20:42 +0100 (2010-08-29)
parents e7f79c6ad246
children b5eb404af481
files Makefile rt.cl src/ocl.cc src/ocl.h src/rt.cc src/scene.h
diffstat 6 files changed, 29 insertions(+), 35 deletions(-) [+]
line diff
     1.1 --- a/Makefile	Sat Aug 28 21:50:17 2010 +0100
     1.2 +++ b/Makefile	Sun Aug 29 04:20:42 2010 +0100
     1.3 @@ -3,15 +3,18 @@
     1.4  dep = $(obj:.o=.d)
     1.5  bin = test
     1.6  
     1.7 +opt = -O3 -ffast-math
     1.8 +dbg = -g
     1.9 +
    1.10  CXX = g++
    1.11 -CXXFLAGS = -pedantic -Wall -g $(def)
    1.12 +CXXFLAGS = -pedantic -Wall $(dbg) $(opt) $(def)
    1.13  LDFLAGS = $(libgl) $(libcl) -lpthread
    1.14  
    1.15  ifeq ($(shell uname -s), Darwin)
    1.16  	libgl = -framework OpenGL -framework GLUT
    1.17  	libcl = -framework OpenCL
    1.18  else
    1.19 -	libgl = -lGL -lglut
    1.20 +	libgl = -lGL -lGLU -lglut
    1.21  	libcl = -lOpenCL
    1.22  	def = -DCLGL_INTEROP
    1.23  endif
     2.1 --- a/rt.cl	Sat Aug 28 21:50:17 2010 +0100
     2.2 +++ b/rt.cl	Sun Aug 29 04:20:42 2010 +0100
     2.3 @@ -1,4 +1,5 @@
     2.4  /* vim: set ft=opencl:ts=4:sw=4 */
     2.5 +#include "common.h"
     2.6  
     2.7  struct RendInfo {
     2.8  	float4 ambient;
     2.9 @@ -58,7 +59,6 @@
    2.10  	float4 min, max;
    2.11  };
    2.12  
    2.13 -#define MAX_NODE_FACES	32
    2.14  struct KDNode {
    2.15  	struct AABBox aabb;
    2.16  	int face_idx[MAX_NODE_FACES];
    2.17 @@ -67,7 +67,6 @@
    2.18  	int padding;
    2.19  };
    2.20  
    2.21 -#define RAY_MAG		500.0
    2.22  #define MIN_ENERGY	0.001
    2.23  #define EPSILON		1e-5
    2.24  
    2.25 @@ -176,7 +175,7 @@
    2.26  	return dcol + scol;
    2.27  }
    2.28  
    2.29 -#define STACK_SIZE	64
    2.30 +#define STACK_SIZE	MAX_TREE_DEPTH
    2.31  bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg)
    2.32  {
    2.33  	struct SurfPoint sp0;
    2.34 @@ -369,20 +368,22 @@
    2.35  // read a KD-tree node from a texture scanline
    2.36  void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg)
    2.37  {
    2.38 +	int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT);
    2.39 +
    2.40  	int2 tc;
    2.41 -	tc.x = 0;
    2.42 -	tc.y = idx;
    2.43 +	tc.x = startx;
    2.44 +	tc.y = idx % KDIMG_MAX_HEIGHT;
    2.45  
    2.46  	node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++;
    2.47  	node->aabb.max = read_imagef(kdimg, kdsampler, tc);
    2.48  
    2.49 -	tc.x = 2 + MAX_NODE_FACES / 4;
    2.50 +	tc.x = startx + 2 + MAX_NODE_FACES / 4;
    2.51  	float4 pix = read_imagef(kdimg, kdsampler, tc);
    2.52  	node->num_faces = (int)pix.x;
    2.53  	node->left = (int)pix.y;
    2.54  	node->right = (int)pix.z;
    2.55  
    2.56 -	tc.x = 2;
    2.57 +	tc.x = startx + 2;
    2.58  	for(int i=0; i<node->num_faces; i+=4) {
    2.59  		float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++;
    2.60  		node->face_idx[i] = (int)pix.x;
     3.1 --- a/src/ocl.cc	Sat Aug 28 21:50:17 2010 +0100
     3.2 +++ b/src/ocl.cc	Sun Aug 29 04:20:42 2010 +0100
     3.3 @@ -61,8 +61,6 @@
     3.4  		return false;
     3.5  	}
     3.6  
     3.7 -
     3.8 -
     3.9  #ifndef CLGL_INTEROP
    3.10  	cl_context_properties *prop = 0;
    3.11  #else
    3.12 @@ -499,12 +497,9 @@
    3.13  	return num_args;
    3.14  }
    3.15  
    3.16 -bool CLProgram::build()
    3.17 +bool CLProgram::build(const char *opt)
    3.18  {
    3.19  	int err;
    3.20 -
    3.21 -	const char *opt = "-cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math";
    3.22 -
    3.23  	if((err = clBuildProgram(prog, 0, 0, opt, 0, 0)) != 0) {
    3.24  		size_t sz;
    3.25  		clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
     4.1 --- a/src/ocl.h	Sat Aug 28 21:50:17 2010 +0100
     4.2 +++ b/src/ocl.h	Sun Aug 29 04:20:42 2010 +0100
     4.3 @@ -103,7 +103,7 @@
     4.4  	CLMemBuffer *get_arg_buffer(int arg);
     4.5  	int get_num_args() const;
     4.6  
     4.7 -	bool build();
     4.8 +	bool build(const char *opt = 0);
     4.9  
    4.10  	bool run() const;
    4.11  	bool run(int dim, ...) const;
     5.1 --- a/src/rt.cc	Sat Aug 28 21:50:17 2010 +0100
     5.2 +++ b/src/rt.cc	Sun Aug 29 04:20:42 2010 +0100
     5.3 @@ -6,6 +6,7 @@
     5.4  #include "ocl.h"
     5.5  #include "scene.h"
     5.6  #include "timer.h"
     5.7 +#include "common.h"
     5.8  
     5.9  // kernel arguments
    5.10  enum {
    5.11 @@ -122,7 +123,8 @@
    5.12  		return false;
    5.13  	}
    5.14  
    5.15 -	if(!prog->build()) {
    5.16 +	const char *opt = "-Isrc -cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math";
    5.17 +	if(!prog->build(opt)) {
    5.18  		return false;
    5.19  	}
    5.20  
    5.21 @@ -291,29 +293,19 @@
    5.22  
    5.23  	float mag = sqrt(px * px + py * py + pz * pz);
    5.24  
    5.25 -	px = px * 500.0 / mag;
    5.26 -	py = py * 500.0 / mag;
    5.27 -	pz = pz * 500.0 / mag;
    5.28 +	px = px * RAY_MAG / mag;
    5.29 +	py = py * RAY_MAG / mag;
    5.30 +	pz = pz * RAY_MAG / mag;
    5.31  
    5.32  	Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}};
    5.33  	return ray;
    5.34  }
    5.35  
    5.36 -static int next_pow2(int x)
    5.37 -{
    5.38 -	x--;
    5.39 -	x = (x >> 1) | x;
    5.40 -	x = (x >> 2) | x;
    5.41 -	x = (x >> 4) | x;
    5.42 -	x = (x >> 8) | x;
    5.43 -	x = (x >> 16) | x;
    5.44 -	return x + 1;
    5.45 -}
    5.46 -
    5.47  static float *create_kdimage(const KDNodeGPU *kdtree, int num_nodes, int *xsz_ret, int *ysz_ret)
    5.48  {
    5.49 -	int xsz = 16;
    5.50 -	int ysz = next_pow2(num_nodes);
    5.51 +	int ysz = MIN(num_nodes, KDIMG_MAX_HEIGHT);
    5.52 +	int columns = (num_nodes - 1) / KDIMG_MAX_HEIGHT + 1;
    5.53 +	int xsz = KDIMG_NODE_WIDTH * columns;
    5.54  
    5.55  	printf("creating kdtree image %dx%d (%d nodes)\n", xsz, ysz, num_nodes);
    5.56  
    5.57 @@ -321,7 +313,10 @@
    5.58  	memset(img, 0, 4 * xsz * ysz * sizeof *img);
    5.59  
    5.60  	for(int i=0; i<num_nodes; i++) {
    5.61 -		float *ptr = img + i * 4 * xsz;
    5.62 +		int x = KDIMG_NODE_WIDTH * (i / KDIMG_MAX_HEIGHT);
    5.63 +		int y = i % KDIMG_MAX_HEIGHT;
    5.64 +
    5.65 +		float *ptr = img + (y * xsz + x) * 4;
    5.66  
    5.67  		*ptr++ = kdtree[i].aabb.min[0];
    5.68  		*ptr++ = kdtree[i].aabb.min[1];
     6.1 --- a/src/scene.h	Sat Aug 28 21:50:17 2010 +0100
     6.2 +++ b/src/scene.h	Sun Aug 29 04:20:42 2010 +0100
     6.3 @@ -4,6 +4,7 @@
     6.4  #include <stdio.h>
     6.5  #include <vector>
     6.6  #include <list>
     6.7 +#include "common.h"
     6.8  
     6.9  struct Vertex {
    6.10  	float pos[4];
    6.11 @@ -51,7 +52,6 @@
    6.12  	KDNode();
    6.13  };
    6.14  
    6.15 -#define MAX_NODE_FACES	32
    6.16  struct KDNodeGPU {
    6.17  	AABBox aabb;
    6.18  	int face_idx[MAX_NODE_FACES];