clray

changeset 28:97cfd9675310

trying to pass the kdtree to the kernel
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 21 Aug 2010 03:42:49 +0100
parents 8b2f2ad14ae7
children 353d80127627
files rt.cl src/ocl.cc src/ocl.h src/rt.cc src/scene.cc src/scene.h
diffstat 6 files changed, 158 insertions(+), 36 deletions(-) [+]
line diff
     1.1 --- a/rt.cl	Tue Aug 17 20:35:00 2010 +0100
     1.2 +++ b/rt.cl	Sat Aug 21 03:42:49 2010 +0100
     1.3 @@ -5,6 +5,7 @@
     1.4  	int xsz, ysz;
     1.5  	int num_faces, num_lights;
     1.6  	int max_iter;
     1.7 +	int kd_depth;
     1.8  };
     1.9  
    1.10  struct Vertex {
    1.11 @@ -50,6 +51,18 @@
    1.12  	global const struct Light *lights;
    1.13  	int num_lights;
    1.14  	global const struct Material *matlib;
    1.15 +	global const struct KDNode *kdtree;
    1.16 +};
    1.17 +
    1.18 +struct AABBox {
    1.19 +	float4 min, max;
    1.20 +};
    1.21 +
    1.22 +struct KDNode {
    1.23 +	AABBox aabb;
    1.24 +	int face_idx[32];
    1.25 +	int num_faces;
    1.26 +	int padding[3];
    1.27  };
    1.28  
    1.29  #define MIN_ENERGY	0.001
    1.30 @@ -58,6 +71,7 @@
    1.31  float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp);
    1.32  bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp);
    1.33  bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp);
    1.34 +bool intersect_aabb(struct Ray ray, struct AABBox aabb);
    1.35  
    1.36  float4 reflect(float4 v, float4 n);
    1.37  float4 transform(float4 v, global const float *xform);
    1.38 @@ -72,7 +86,8 @@
    1.39  		global const struct Light *lights,
    1.40  		global const struct Ray *primrays,
    1.41  		global const float *xform,
    1.42 -		global const float *invtrans)
    1.43 +		global const float *invtrans,
    1.44 +		global const struct KDNode *kdtree)
    1.45  {
    1.46  	int idx = get_global_id(0);
    1.47  
    1.48 @@ -146,8 +161,12 @@
    1.49  	return dcol + scol;
    1.50  }
    1.51  
    1.52 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres)
    1.53 +{
    1.54 +	return false;
    1.55 +}
    1.56  
    1.57 -bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres)
    1.58 +/*bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres)
    1.59  {
    1.60  	struct SurfPoint sp, sp0;
    1.61  	sp0.t = 1.0;
    1.62 @@ -168,7 +187,7 @@
    1.63  		spres->mat = scn->matlib[sp0.obj->matid];
    1.64  	}
    1.65  	return true;
    1.66 -}
    1.67 +}*/
    1.68  
    1.69  bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp)
    1.70  {
    1.71 @@ -210,6 +229,44 @@
    1.72  	return true;
    1.73  }
    1.74  
    1.75 +bool intersect_aabb(struct Ray ray, struct AABBox aabb)
    1.76 +{
    1.77 +	if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z &&
    1.78 +			ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) {
    1.79 +		return true;
    1.80 +	}
    1.81 +
    1.82 +	float4 bbox[2] = {aabb.min, aabb.max};
    1.83 +
    1.84 +	int xsign = (int)(ray.dir.x < 0.0);
    1.85 +	float invdirx = 1.0 / ray.dir.x;
    1.86 +	float tmin = (bbox[xsign].x - ray.origin.x) * invdirx;
    1.87 +	float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx;
    1.88 +
    1.89 +	int ysign = (int)(ray.dir.y < 0.0);
    1.90 +	float invdiry = 1.0 / ray.dir.y;
    1.91 +	float tymin = (bbox[ysign].y - ray.origin.y) * invdiry;
    1.92 +	float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry;
    1.93 +
    1.94 +	if(tmin > tymax || tymin > tmax) {
    1.95 +		return false;
    1.96 +	}
    1.97 +
    1.98 +	if(tymin > tmin) tmin = tymin;
    1.99 +	if(tymax < tmax) tmax = tymax;
   1.100 +
   1.101 +	int zsign = (int)(ray.dir.z < 0.0);
   1.102 +	float invdirz = 1.0 / ray.dir.z;
   1.103 +	float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz;
   1.104 +	float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz;
   1.105 +
   1.106 +	if(tmin > tzmax || tzmin > tmax) {
   1.107 +		return false;
   1.108 +	}
   1.109 +
   1.110 +	return tmin < t1 && tmax > t0;
   1.111 +}
   1.112 +
   1.113  float4 reflect(float4 v, float4 n)
   1.114  {
   1.115  	return 2.0f * dot(v, n) * n - v;
     2.1 --- a/src/ocl.cc	Tue Aug 17 20:35:00 2010 +0100
     2.2 +++ b/src/ocl.cc	Sat Aug 21 03:42:49 2010 +0100
     2.3 @@ -74,7 +74,7 @@
     2.4  }
     2.5  
     2.6  
     2.7 -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
     2.8 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
     2.9  {
    2.10  	int err;
    2.11  	cl_mem mem;
    2.12 @@ -85,7 +85,7 @@
    2.13  	}
    2.14  
    2.15  
    2.16 -	if(!(mem = clCreateBuffer(ctx, flags, sz, buf, &err))) {
    2.17 +	if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
    2.18  		fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
    2.19  		return 0;
    2.20  	}
    2.21 @@ -131,7 +131,7 @@
    2.22  	mbuf->ptr = 0;
    2.23  }
    2.24  
    2.25 -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
    2.26 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src)
    2.27  {
    2.28  	if(!mbuf) return false;
    2.29  
    2.30 @@ -243,7 +243,7 @@
    2.31  	return true;
    2.32  }
    2.33  
    2.34 -bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr)
    2.35 +bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
    2.36  {
    2.37  	printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
    2.38  	CLMemBuffer *buf;
     3.1 --- a/src/ocl.h	Tue Aug 17 20:35:00 2010 +0100
     3.2 +++ b/src/ocl.h	Sat Aug 21 03:42:49 2010 +0100
     3.3 @@ -27,13 +27,13 @@
     3.4  	void *ptr;
     3.5  };
     3.6  
     3.7 -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf = 0);
     3.8 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf = 0);
     3.9  void destroy_mem_buffer(CLMemBuffer *mbuf);
    3.10  
    3.11  void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr);
    3.12  void unmap_mem_buffer(CLMemBuffer *mbuf);
    3.13  
    3.14 -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src);
    3.15 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src);
    3.16  bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest);
    3.17  
    3.18  enum {
    3.19 @@ -74,7 +74,7 @@
    3.20  
    3.21  	bool set_argi(int arg, int val);
    3.22  	bool set_argf(int arg, float val);
    3.23 -	bool set_arg_buffer(int arg, int rdwr, size_t sz, void *buf = 0);
    3.24 +	bool set_arg_buffer(int arg, int rdwr, size_t sz, const void *buf = 0);
    3.25  	CLMemBuffer *get_arg_buffer(int arg);
    3.26  	int get_num_args() const;
    3.27  
     4.1 --- a/src/rt.cc	Tue Aug 17 20:35:00 2010 +0100
     4.2 +++ b/src/rt.cc	Sat Aug 21 03:42:49 2010 +0100
     4.3 @@ -16,6 +16,7 @@
     4.4  	KARG_PRIM_RAYS,
     4.5  	KARG_XFORM,
     4.6  	KARG_INVTRANS_XFORM,
     4.7 +	KARG_KDTREE,
     4.8  
     4.9  	NUM_KERNEL_ARGS
    4.10  };
    4.11 @@ -25,6 +26,7 @@
    4.12  	int xsz, ysz;
    4.13  	int num_faces, num_lights;
    4.14  	int max_iter;
    4.15 +	int kd_depth;
    4.16  };
    4.17  
    4.18  struct Ray {
    4.19 @@ -61,6 +63,7 @@
    4.20  	rinf.num_faces = scn->get_num_faces();
    4.21  	rinf.num_lights = sizeof lightlist / sizeof *lightlist;
    4.22  	rinf.max_iter = 6;
    4.23 +	rinf.kd_depth = kdtree_depth(scn->kdtree);
    4.24  
    4.25  	/* calculate primary rays */
    4.26  	prim_rays = new Ray[xsz * ysz];
    4.27 @@ -82,6 +85,13 @@
    4.28  		return false;
    4.29  	}
    4.30  
    4.31 +	const KDNodeGPU *kdbuf = scn->get_kdtree_buffer();
    4.32 +	if(!kdbuf) {
    4.33 +		fprintf(stderr, "failed to create kdtree buffer\n");
    4.34 +		return false;
    4.35 +	}
    4.36 +	int num_kdnodes = scn->get_num_kdnodes();
    4.37 +
    4.38  	/* setup argument buffers */
    4.39  	prog->set_arg_buffer(KARG_FRAMEBUFFER, ARG_WR, xsz * ysz * 4 * sizeof(float));
    4.40  	prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf);
    4.41 @@ -91,6 +101,7 @@
    4.42  	prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
    4.43  	prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float));
    4.44  	prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float));
    4.45 +	prog->set_arg_buffer(KARG_KDTREE, ARG_RD, num_kdnodes * sizeof *kdbuf, kdbuf);
    4.46  
    4.47  	if(prog->get_num_args() < NUM_KERNEL_ARGS) {
    4.48  		return false;
     5.1 --- a/src/scene.cc	Tue Aug 17 20:35:00 2010 +0100
     5.2 +++ b/src/scene.cc	Sat Aug 21 03:42:49 2010 +0100
     5.3 @@ -9,12 +9,12 @@
     5.4  static bool build_kdtree(KDNode *kd, int level = 0);
     5.5  static float eval_cost(const std::list<const Face*> &faces, const AABBox &aabb, int axis, float par_sarea = 1.0);
     5.6  static void free_kdtree(KDNode *node);
     5.7 -static int kdtree_depth(const KDNode *node);
     5.8 +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf);
     5.9  static void print_item_counts(const KDNode *node, int level);
    5.10  
    5.11  
    5.12  static int accel_param[NUM_ACCEL_PARAMS] = {
    5.13 -	75,	// max tree depth
    5.14 +	40,	// max tree depth
    5.15  	0,	// max items per node (0 means ignore limit)
    5.16  	5,	// estimated traversal cost
    5.17  	15	// estimated interseciton cost
    5.18 @@ -70,11 +70,15 @@
    5.19  	facebuf = 0;
    5.20  	num_faces = -1;
    5.21  	kdtree = 0;
    5.22 +	num_kdnodes = -1;
    5.23 +	kdbuf = 0;
    5.24  }
    5.25  
    5.26  Scene::~Scene()
    5.27  {
    5.28  	delete [] facebuf;
    5.29 +	delete [] kdbuf;
    5.30 +	free_kdtree(kdtree);
    5.31  }
    5.32  
    5.33  bool Scene::add_mesh(Mesh *m)
    5.34 @@ -158,6 +162,35 @@
    5.35  	return facebuf;
    5.36  }
    5.37  
    5.38 +const KDNodeGPU *Scene::get_kdtree_buffer() const
    5.39 +{
    5.40 +	if(kdbuf) {
    5.41 +		return kdbuf;
    5.42 +	}
    5.43 +
    5.44 +	if(!kdtree) {
    5.45 +		((Scene*)this)->build_kdtree();
    5.46 +	}
    5.47 +
    5.48 +	if(!get_num_kdnodes()) {
    5.49 +		return 0;
    5.50 +	}
    5.51 +
    5.52 +	kdbuf = new KDNodeGPU[num_kdnodes + 1];
    5.53 +	kdtree_gpu_flatten(kdbuf, 1, kdtree, get_face_buffer());
    5.54 +	return kdbuf;
    5.55 +}
    5.56 +
    5.57 +int Scene::get_num_kdnodes() const
    5.58 +{
    5.59 +	if(num_kdnodes >= 0) {
    5.60 +		return num_kdnodes;
    5.61 +	}
    5.62 +
    5.63 +	num_kdnodes = kdtree_nodes(kdtree);
    5.64 +	return num_kdnodes;
    5.65 +}
    5.66 +
    5.67  
    5.68  void Scene::draw_kdtree() const
    5.69  {
    5.70 @@ -175,22 +208,10 @@
    5.71  
    5.72  static float palette[][3] = {
    5.73  	{0, 1, 0},
    5.74 -	{0, 1, 0},
    5.75 -	{0, 1, 0},
    5.76 -	{1, 0, 0},
    5.77 -	{1, 0, 0},
    5.78  	{1, 0, 0},
    5.79  	{0, 0, 1},
    5.80 -	{0, 0, 1},
    5.81 -	{0, 0, 1},
    5.82 -	{1, 1, 0},
    5.83 -	{1, 1, 0},
    5.84  	{1, 1, 0},
    5.85  	{0, 0, 1},
    5.86 -	{0, 0, 1},
    5.87 -	{0, 0, 1},
    5.88 -	{1, 0, 1},
    5.89 -	{1, 0, 1},
    5.90  	{1, 0, 1}
    5.91  };
    5.92  static int pal_size = sizeof palette / sizeof *palette;
    5.93 @@ -291,10 +312,11 @@
    5.94  
    5.95  static bool build_kdtree(KDNode *kd, int level)
    5.96  {
    5.97 +	int opt_max_depth = accel_param[ACCEL_PARAM_MAX_TREE_DEPTH];
    5.98  	int opt_max_items = accel_param[ACCEL_PARAM_MAX_NODE_ITEMS];
    5.99  	int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
   5.100  
   5.101 -	if(kd->num_faces == 0) {
   5.102 +	if(kd->num_faces == 0 || level >= opt_max_depth) {
   5.103  		return true;
   5.104  	}
   5.105  
   5.106 @@ -410,7 +432,7 @@
   5.107  	}
   5.108  }
   5.109  
   5.110 -static int kdtree_depth(const KDNode *node)
   5.111 +int kdtree_depth(const KDNode *node)
   5.112  {
   5.113  	if(!node) return 0;
   5.114  
   5.115 @@ -419,6 +441,36 @@
   5.116  	return (left > right ? left : right) + 1;
   5.117  }
   5.118  
   5.119 +int kdtree_nodes(const KDNode *node)
   5.120 +{
   5.121 +	if(!node) return 0;
   5.122 +	return kdtree_nodes(node->left) + kdtree_nodes(node->right) + 1;
   5.123 +}
   5.124 +
   5.125 +#define MAX_FACES	(sizeof dest->face_idx / sizeof *dest->face_idx)
   5.126 +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf)
   5.127 +{
   5.128 +	KDNodeGPU *dest = kdbuf + idx;
   5.129 +
   5.130 +	dest->aabb = node->aabb;
   5.131 +	dest->num_faces = 0;
   5.132 +
   5.133 +	std::list<const Face*>::const_iterator it = node->faces.begin();
   5.134 +	while(it != node->faces.end()) {
   5.135 +		if(dest->num_faces >= (int)MAX_FACES) {
   5.136 +			fprintf(stderr, "kdtree_gpu_flatten WARNING: more than %d faces in node, skipping!\n", (int)MAX_FACES);
   5.137 +			break;
   5.138 +		}
   5.139 +		dest->face_idx[dest->num_faces++] = *it - facebuf;
   5.140 +	}
   5.141 +
   5.142 +	if(node->left) {
   5.143 +		assert(node->right);
   5.144 +		kdtree_gpu_flatten(kdbuf, idx * 2, node->left, facebuf);
   5.145 +		kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right, facebuf);
   5.146 +	}
   5.147 +}
   5.148 +
   5.149  static void print_item_counts(const KDNode *node, int level)
   5.150  {
   5.151  	if(!node) return;
     6.1 --- a/src/scene.h	Tue Aug 17 20:35:00 2010 +0100
     6.2 +++ b/src/scene.h	Sat Aug 21 03:42:49 2010 +0100
     6.3 @@ -46,12 +46,6 @@
     6.4  	KDAXIS_Z
     6.5  };
     6.6  
     6.7 -#define KDCLEAR(node)	((node)->axis = -1)
     6.8 -#define KDUSED(node)	((node)->axis >= 0)
     6.9 -#define KDPARENT(x)		((x) >> 1)
    6.10 -#define KDLEFT(x)		((x) << 1)
    6.11 -#define KDRIGHT(x)		(((x) << 1) + 1)
    6.12 -
    6.13  struct KDNode {
    6.14  	int axis;
    6.15  	float pt;
    6.16 @@ -66,8 +60,10 @@
    6.17  };
    6.18  
    6.19  struct KDNodeGPU {
    6.20 -	int axis;
    6.21 -	float pt;
    6.22 +	AABBox aabb;
    6.23 +	int face_idx[32];
    6.24 +	int num_faces;
    6.25 +	int padding[3];
    6.26  };
    6.27  
    6.28  
    6.29 @@ -76,12 +72,13 @@
    6.30  	mutable Face *facebuf;
    6.31  	mutable int num_faces;
    6.32  
    6.33 +	mutable KDNodeGPU *kdbuf;
    6.34 +	mutable int num_kdnodes;
    6.35 +
    6.36  public:
    6.37  	std::vector<Mesh*> meshes;
    6.38  	std::vector<Material> matlib;
    6.39 -
    6.40  	KDNode *kdtree;
    6.41 -	std::vector<KDNode> kdtree_gpu;
    6.42  
    6.43  	Scene();
    6.44  	~Scene();
    6.45 @@ -98,6 +95,8 @@
    6.46  	bool load(FILE *fp);
    6.47  
    6.48  	const Face *get_face_buffer() const;
    6.49 +	const KDNodeGPU *get_kdtree_buffer() const;
    6.50 +	int get_num_kdnodes() const;
    6.51  
    6.52  	void draw_kdtree() const;
    6.53  	bool build_kdtree();
    6.54 @@ -114,4 +113,7 @@
    6.55  
    6.56  void set_accel_param(int p, int v);
    6.57  
    6.58 +int kdtree_depth(const KDNode *tree);
    6.59 +int kdtree_nodes(const KDNode *tree);
    6.60 +
    6.61  #endif	/* MESH_H_ */