clray
changeset 28:97cfd9675310
trying to pass the kdtree to the kernel
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 21 Aug 2010 03:42:49 +0100 |
parents | 8b2f2ad14ae7 |
children | 353d80127627 |
files | rt.cl src/ocl.cc src/ocl.h src/rt.cc src/scene.cc src/scene.h |
diffstat | 6 files changed, 158 insertions(+), 36 deletions(-) [+] |
line diff
1.1 --- a/rt.cl Tue Aug 17 20:35:00 2010 +0100 1.2 +++ b/rt.cl Sat Aug 21 03:42:49 2010 +0100 1.3 @@ -5,6 +5,7 @@ 1.4 int xsz, ysz; 1.5 int num_faces, num_lights; 1.6 int max_iter; 1.7 + int kd_depth; 1.8 }; 1.9 1.10 struct Vertex { 1.11 @@ -50,6 +51,18 @@ 1.12 global const struct Light *lights; 1.13 int num_lights; 1.14 global const struct Material *matlib; 1.15 + global const struct KDNode *kdtree; 1.16 +}; 1.17 + 1.18 +struct AABBox { 1.19 + float4 min, max; 1.20 +}; 1.21 + 1.22 +struct KDNode { 1.23 + AABBox aabb; 1.24 + int face_idx[32]; 1.25 + int num_faces; 1.26 + int padding[3]; 1.27 }; 1.28 1.29 #define MIN_ENERGY 0.001 1.30 @@ -58,6 +71,7 @@ 1.31 float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp); 1.32 bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp); 1.33 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp); 1.34 +bool intersect_aabb(struct Ray ray, struct AABBox aabb); 1.35 1.36 float4 reflect(float4 v, float4 n); 1.37 float4 transform(float4 v, global const float *xform); 1.38 @@ -72,7 +86,8 @@ 1.39 global const struct Light *lights, 1.40 global const struct Ray *primrays, 1.41 global const float *xform, 1.42 - global const float *invtrans) 1.43 + global const float *invtrans, 1.44 + global const struct KDNode *kdtree) 1.45 { 1.46 int idx = get_global_id(0); 1.47 1.48 @@ -146,8 +161,12 @@ 1.49 return dcol + scol; 1.50 } 1.51 1.52 +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) 1.53 +{ 1.54 + return false; 1.55 +} 1.56 1.57 -bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) 1.58 +/*bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) 1.59 { 1.60 struct SurfPoint sp, sp0; 1.61 sp0.t = 1.0; 1.62 @@ -168,7 +187,7 @@ 1.63 spres->mat = scn->matlib[sp0.obj->matid]; 1.64 } 1.65 return true; 1.66 -} 1.67 +}*/ 1.68 1.69 bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp) 1.70 { 1.71 @@ -210,6 +229,44 @@ 1.72 return true; 1.73 } 1.74 1.75 +bool intersect_aabb(struct Ray ray, struct AABBox aabb) 1.76 +{ 1.77 + if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z && 1.78 + ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) { 1.79 + return true; 1.80 + } 1.81 + 1.82 + float4 bbox[2] = {aabb.min, aabb.max}; 1.83 + 1.84 + int xsign = (int)(ray.dir.x < 0.0); 1.85 + float invdirx = 1.0 / ray.dir.x; 1.86 + float tmin = (bbox[xsign].x - ray.origin.x) * invdirx; 1.87 + float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx; 1.88 + 1.89 + int ysign = (int)(ray.dir.y < 0.0); 1.90 + float invdiry = 1.0 / ray.dir.y; 1.91 + float tymin = (bbox[ysign].y - ray.origin.y) * invdiry; 1.92 + float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry; 1.93 + 1.94 + if(tmin > tymax || tymin > tmax) { 1.95 + return false; 1.96 + } 1.97 + 1.98 + if(tymin > tmin) tmin = tymin; 1.99 + if(tymax < tmax) tmax = tymax; 1.100 + 1.101 + int zsign = (int)(ray.dir.z < 0.0); 1.102 + float invdirz = 1.0 / ray.dir.z; 1.103 + float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz; 1.104 + float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz; 1.105 + 1.106 + if(tmin > tzmax || tzmin > tmax) { 1.107 + return false; 1.108 + } 1.109 + 1.110 + return tmin < t1 && tmax > t0; 1.111 +} 1.112 + 1.113 float4 reflect(float4 v, float4 n) 1.114 { 1.115 return 2.0f * dot(v, n) * n - v;
2.1 --- a/src/ocl.cc Tue Aug 17 20:35:00 2010 +0100 2.2 +++ b/src/ocl.cc Sat Aug 21 03:42:49 2010 +0100 2.3 @@ -74,7 +74,7 @@ 2.4 } 2.5 2.6 2.7 -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf) 2.8 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf) 2.9 { 2.10 int err; 2.11 cl_mem mem; 2.12 @@ -85,7 +85,7 @@ 2.13 } 2.14 2.15 2.16 - if(!(mem = clCreateBuffer(ctx, flags, sz, buf, &err))) { 2.17 + if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) { 2.18 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err)); 2.19 return 0; 2.20 } 2.21 @@ -131,7 +131,7 @@ 2.22 mbuf->ptr = 0; 2.23 } 2.24 2.25 -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src) 2.26 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src) 2.27 { 2.28 if(!mbuf) return false; 2.29 2.30 @@ -243,7 +243,7 @@ 2.31 return true; 2.32 } 2.33 2.34 -bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr) 2.35 +bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr) 2.36 { 2.37 printf("create argument %d buffer: %d bytes\n", idx, (int)sz); 2.38 CLMemBuffer *buf;
3.1 --- a/src/ocl.h Tue Aug 17 20:35:00 2010 +0100 3.2 +++ b/src/ocl.h Sat Aug 21 03:42:49 2010 +0100 3.3 @@ -27,13 +27,13 @@ 3.4 void *ptr; 3.5 }; 3.6 3.7 -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf = 0); 3.8 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf = 0); 3.9 void destroy_mem_buffer(CLMemBuffer *mbuf); 3.10 3.11 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr); 3.12 void unmap_mem_buffer(CLMemBuffer *mbuf); 3.13 3.14 -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src); 3.15 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src); 3.16 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest); 3.17 3.18 enum { 3.19 @@ -74,7 +74,7 @@ 3.20 3.21 bool set_argi(int arg, int val); 3.22 bool set_argf(int arg, float val); 3.23 - bool set_arg_buffer(int arg, int rdwr, size_t sz, void *buf = 0); 3.24 + bool set_arg_buffer(int arg, int rdwr, size_t sz, const void *buf = 0); 3.25 CLMemBuffer *get_arg_buffer(int arg); 3.26 int get_num_args() const; 3.27
4.1 --- a/src/rt.cc Tue Aug 17 20:35:00 2010 +0100 4.2 +++ b/src/rt.cc Sat Aug 21 03:42:49 2010 +0100 4.3 @@ -16,6 +16,7 @@ 4.4 KARG_PRIM_RAYS, 4.5 KARG_XFORM, 4.6 KARG_INVTRANS_XFORM, 4.7 + KARG_KDTREE, 4.8 4.9 NUM_KERNEL_ARGS 4.10 }; 4.11 @@ -25,6 +26,7 @@ 4.12 int xsz, ysz; 4.13 int num_faces, num_lights; 4.14 int max_iter; 4.15 + int kd_depth; 4.16 }; 4.17 4.18 struct Ray { 4.19 @@ -61,6 +63,7 @@ 4.20 rinf.num_faces = scn->get_num_faces(); 4.21 rinf.num_lights = sizeof lightlist / sizeof *lightlist; 4.22 rinf.max_iter = 6; 4.23 + rinf.kd_depth = kdtree_depth(scn->kdtree); 4.24 4.25 /* calculate primary rays */ 4.26 prim_rays = new Ray[xsz * ysz]; 4.27 @@ -82,6 +85,13 @@ 4.28 return false; 4.29 } 4.30 4.31 + const KDNodeGPU *kdbuf = scn->get_kdtree_buffer(); 4.32 + if(!kdbuf) { 4.33 + fprintf(stderr, "failed to create kdtree buffer\n"); 4.34 + return false; 4.35 + } 4.36 + int num_kdnodes = scn->get_num_kdnodes(); 4.37 + 4.38 /* setup argument buffers */ 4.39 prog->set_arg_buffer(KARG_FRAMEBUFFER, ARG_WR, xsz * ysz * 4 * sizeof(float)); 4.40 prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf); 4.41 @@ -91,6 +101,7 @@ 4.42 prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays); 4.43 prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float)); 4.44 prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float)); 4.45 + prog->set_arg_buffer(KARG_KDTREE, ARG_RD, num_kdnodes * sizeof *kdbuf, kdbuf); 4.46 4.47 if(prog->get_num_args() < NUM_KERNEL_ARGS) { 4.48 return false;
5.1 --- a/src/scene.cc Tue Aug 17 20:35:00 2010 +0100 5.2 +++ b/src/scene.cc Sat Aug 21 03:42:49 2010 +0100 5.3 @@ -9,12 +9,12 @@ 5.4 static bool build_kdtree(KDNode *kd, int level = 0); 5.5 static float eval_cost(const std::list<const Face*> &faces, const AABBox &aabb, int axis, float par_sarea = 1.0); 5.6 static void free_kdtree(KDNode *node); 5.7 -static int kdtree_depth(const KDNode *node); 5.8 +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf); 5.9 static void print_item_counts(const KDNode *node, int level); 5.10 5.11 5.12 static int accel_param[NUM_ACCEL_PARAMS] = { 5.13 - 75, // max tree depth 5.14 + 40, // max tree depth 5.15 0, // max items per node (0 means ignore limit) 5.16 5, // estimated traversal cost 5.17 15 // estimated interseciton cost 5.18 @@ -70,11 +70,15 @@ 5.19 facebuf = 0; 5.20 num_faces = -1; 5.21 kdtree = 0; 5.22 + num_kdnodes = -1; 5.23 + kdbuf = 0; 5.24 } 5.25 5.26 Scene::~Scene() 5.27 { 5.28 delete [] facebuf; 5.29 + delete [] kdbuf; 5.30 + free_kdtree(kdtree); 5.31 } 5.32 5.33 bool Scene::add_mesh(Mesh *m) 5.34 @@ -158,6 +162,35 @@ 5.35 return facebuf; 5.36 } 5.37 5.38 +const KDNodeGPU *Scene::get_kdtree_buffer() const 5.39 +{ 5.40 + if(kdbuf) { 5.41 + return kdbuf; 5.42 + } 5.43 + 5.44 + if(!kdtree) { 5.45 + ((Scene*)this)->build_kdtree(); 5.46 + } 5.47 + 5.48 + if(!get_num_kdnodes()) { 5.49 + return 0; 5.50 + } 5.51 + 5.52 + kdbuf = new KDNodeGPU[num_kdnodes + 1]; 5.53 + kdtree_gpu_flatten(kdbuf, 1, kdtree, get_face_buffer()); 5.54 + return kdbuf; 5.55 +} 5.56 + 5.57 +int Scene::get_num_kdnodes() const 5.58 +{ 5.59 + if(num_kdnodes >= 0) { 5.60 + return num_kdnodes; 5.61 + } 5.62 + 5.63 + num_kdnodes = kdtree_nodes(kdtree); 5.64 + return num_kdnodes; 5.65 +} 5.66 + 5.67 5.68 void Scene::draw_kdtree() const 5.69 { 5.70 @@ -175,22 +208,10 @@ 5.71 5.72 static float palette[][3] = { 5.73 {0, 1, 0}, 5.74 - {0, 1, 0}, 5.75 - {0, 1, 0}, 5.76 - {1, 0, 0}, 5.77 - {1, 0, 0}, 5.78 {1, 0, 0}, 5.79 {0, 0, 1}, 5.80 - {0, 0, 1}, 5.81 - {0, 0, 1}, 5.82 - {1, 1, 0}, 5.83 - {1, 1, 0}, 5.84 {1, 1, 0}, 5.85 {0, 0, 1}, 5.86 - {0, 0, 1}, 5.87 - {0, 0, 1}, 5.88 - {1, 0, 1}, 5.89 - {1, 0, 1}, 5.90 {1, 0, 1} 5.91 }; 5.92 static int pal_size = sizeof palette / sizeof *palette; 5.93 @@ -291,10 +312,11 @@ 5.94 5.95 static bool build_kdtree(KDNode *kd, int level) 5.96 { 5.97 + int opt_max_depth = accel_param[ACCEL_PARAM_MAX_TREE_DEPTH]; 5.98 int opt_max_items = accel_param[ACCEL_PARAM_MAX_NODE_ITEMS]; 5.99 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE]; 5.100 5.101 - if(kd->num_faces == 0) { 5.102 + if(kd->num_faces == 0 || level >= opt_max_depth) { 5.103 return true; 5.104 } 5.105 5.106 @@ -410,7 +432,7 @@ 5.107 } 5.108 } 5.109 5.110 -static int kdtree_depth(const KDNode *node) 5.111 +int kdtree_depth(const KDNode *node) 5.112 { 5.113 if(!node) return 0; 5.114 5.115 @@ -419,6 +441,36 @@ 5.116 return (left > right ? left : right) + 1; 5.117 } 5.118 5.119 +int kdtree_nodes(const KDNode *node) 5.120 +{ 5.121 + if(!node) return 0; 5.122 + return kdtree_nodes(node->left) + kdtree_nodes(node->right) + 1; 5.123 +} 5.124 + 5.125 +#define MAX_FACES (sizeof dest->face_idx / sizeof *dest->face_idx) 5.126 +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf) 5.127 +{ 5.128 + KDNodeGPU *dest = kdbuf + idx; 5.129 + 5.130 + dest->aabb = node->aabb; 5.131 + dest->num_faces = 0; 5.132 + 5.133 + std::list<const Face*>::const_iterator it = node->faces.begin(); 5.134 + while(it != node->faces.end()) { 5.135 + if(dest->num_faces >= (int)MAX_FACES) { 5.136 + fprintf(stderr, "kdtree_gpu_flatten WARNING: more than %d faces in node, skipping!\n", (int)MAX_FACES); 5.137 + break; 5.138 + } 5.139 + dest->face_idx[dest->num_faces++] = *it - facebuf; 5.140 + } 5.141 + 5.142 + if(node->left) { 5.143 + assert(node->right); 5.144 + kdtree_gpu_flatten(kdbuf, idx * 2, node->left, facebuf); 5.145 + kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right, facebuf); 5.146 + } 5.147 +} 5.148 + 5.149 static void print_item_counts(const KDNode *node, int level) 5.150 { 5.151 if(!node) return;
6.1 --- a/src/scene.h Tue Aug 17 20:35:00 2010 +0100 6.2 +++ b/src/scene.h Sat Aug 21 03:42:49 2010 +0100 6.3 @@ -46,12 +46,6 @@ 6.4 KDAXIS_Z 6.5 }; 6.6 6.7 -#define KDCLEAR(node) ((node)->axis = -1) 6.8 -#define KDUSED(node) ((node)->axis >= 0) 6.9 -#define KDPARENT(x) ((x) >> 1) 6.10 -#define KDLEFT(x) ((x) << 1) 6.11 -#define KDRIGHT(x) (((x) << 1) + 1) 6.12 - 6.13 struct KDNode { 6.14 int axis; 6.15 float pt; 6.16 @@ -66,8 +60,10 @@ 6.17 }; 6.18 6.19 struct KDNodeGPU { 6.20 - int axis; 6.21 - float pt; 6.22 + AABBox aabb; 6.23 + int face_idx[32]; 6.24 + int num_faces; 6.25 + int padding[3]; 6.26 }; 6.27 6.28 6.29 @@ -76,12 +72,13 @@ 6.30 mutable Face *facebuf; 6.31 mutable int num_faces; 6.32 6.33 + mutable KDNodeGPU *kdbuf; 6.34 + mutable int num_kdnodes; 6.35 + 6.36 public: 6.37 std::vector<Mesh*> meshes; 6.38 std::vector<Material> matlib; 6.39 - 6.40 KDNode *kdtree; 6.41 - std::vector<KDNode> kdtree_gpu; 6.42 6.43 Scene(); 6.44 ~Scene(); 6.45 @@ -98,6 +95,8 @@ 6.46 bool load(FILE *fp); 6.47 6.48 const Face *get_face_buffer() const; 6.49 + const KDNodeGPU *get_kdtree_buffer() const; 6.50 + int get_num_kdnodes() const; 6.51 6.52 void draw_kdtree() const; 6.53 bool build_kdtree(); 6.54 @@ -114,4 +113,7 @@ 6.55 6.56 void set_accel_param(int p, int v); 6.57 6.58 +int kdtree_depth(const KDNode *tree); 6.59 +int kdtree_nodes(const KDNode *tree); 6.60 + 6.61 #endif /* MESH_H_ */