# HG changeset patch # User John Tsiombikas # Date 1282358569 -3600 # Node ID 97cfd9675310f549f66df8d5479976d0e01157be # Parent 8b2f2ad14ae771b7f6235b572ba0ef1415705032 trying to pass the kdtree to the kernel diff -r 8b2f2ad14ae7 -r 97cfd9675310 rt.cl --- a/rt.cl Tue Aug 17 20:35:00 2010 +0100 +++ b/rt.cl Sat Aug 21 03:42:49 2010 +0100 @@ -5,6 +5,7 @@ int xsz, ysz; int num_faces, num_lights; int max_iter; + int kd_depth; }; struct Vertex { @@ -50,6 +51,18 @@ global const struct Light *lights; int num_lights; global const struct Material *matlib; + global const struct KDNode *kdtree; +}; + +struct AABBox { + float4 min, max; +}; + +struct KDNode { + AABBox aabb; + int face_idx[32]; + int num_faces; + int padding[3]; }; #define MIN_ENERGY 0.001 @@ -58,6 +71,7 @@ float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp); bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp); bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp); +bool intersect_aabb(struct Ray ray, struct AABBox aabb); float4 reflect(float4 v, float4 n); float4 transform(float4 v, global const float *xform); @@ -72,7 +86,8 @@ global const struct Light *lights, global const struct Ray *primrays, global const float *xform, - global const float *invtrans) + global const float *invtrans, + global const struct KDNode *kdtree) { int idx = get_global_id(0); @@ -146,8 +161,12 @@ return dcol + scol; } +bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) +{ + return false; +} -bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) +/*bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) { struct SurfPoint sp, sp0; sp0.t = 1.0; @@ -168,7 +187,7 @@ spres->mat = scn->matlib[sp0.obj->matid]; } return true; -} +}*/ bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp) { @@ -210,6 +229,44 @@ return true; } +bool intersect_aabb(struct Ray ray, struct AABBox aabb) +{ + if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z && + ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) { + return true; + } + + float4 bbox[2] = {aabb.min, aabb.max}; + + int xsign = (int)(ray.dir.x < 0.0); + float invdirx = 1.0 / ray.dir.x; + float tmin = (bbox[xsign].x - ray.origin.x) * invdirx; + float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx; + + int ysign = (int)(ray.dir.y < 0.0); + float invdiry = 1.0 / ray.dir.y; + float tymin = (bbox[ysign].y - ray.origin.y) * invdiry; + float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry; + + if(tmin > tymax || tymin > tmax) { + return false; + } + + if(tymin > tmin) tmin = tymin; + if(tymax < tmax) tmax = tymax; + + int zsign = (int)(ray.dir.z < 0.0); + float invdirz = 1.0 / ray.dir.z; + float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz; + float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz; + + if(tmin > tzmax || tzmin > tmax) { + return false; + } + + return tmin < t1 && tmax > t0; +} + float4 reflect(float4 v, float4 n) { return 2.0f * dot(v, n) * n - v; diff -r 8b2f2ad14ae7 -r 97cfd9675310 src/ocl.cc --- a/src/ocl.cc Tue Aug 17 20:35:00 2010 +0100 +++ b/src/ocl.cc Sat Aug 21 03:42:49 2010 +0100 @@ -74,7 +74,7 @@ } -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf) +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf) { int err; cl_mem mem; @@ -85,7 +85,7 @@ } - if(!(mem = clCreateBuffer(ctx, flags, sz, buf, &err))) { + if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) { fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err)); return 0; } @@ -131,7 +131,7 @@ mbuf->ptr = 0; } -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src) +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src) { if(!mbuf) return false; @@ -243,7 +243,7 @@ return true; } -bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr) +bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr) { printf("create argument %d buffer: %d bytes\n", idx, (int)sz); CLMemBuffer *buf; diff -r 8b2f2ad14ae7 -r 97cfd9675310 src/ocl.h --- a/src/ocl.h Tue Aug 17 20:35:00 2010 +0100 +++ b/src/ocl.h Sat Aug 21 03:42:49 2010 +0100 @@ -27,13 +27,13 @@ void *ptr; }; -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf = 0); +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf = 0); void destroy_mem_buffer(CLMemBuffer *mbuf); void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr); void unmap_mem_buffer(CLMemBuffer *mbuf); -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src); +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src); bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest); enum { @@ -74,7 +74,7 @@ bool set_argi(int arg, int val); bool set_argf(int arg, float val); - bool set_arg_buffer(int arg, int rdwr, size_t sz, void *buf = 0); + bool set_arg_buffer(int arg, int rdwr, size_t sz, const void *buf = 0); CLMemBuffer *get_arg_buffer(int arg); int get_num_args() const; diff -r 8b2f2ad14ae7 -r 97cfd9675310 src/rt.cc --- a/src/rt.cc Tue Aug 17 20:35:00 2010 +0100 +++ b/src/rt.cc Sat Aug 21 03:42:49 2010 +0100 @@ -16,6 +16,7 @@ KARG_PRIM_RAYS, KARG_XFORM, KARG_INVTRANS_XFORM, + KARG_KDTREE, NUM_KERNEL_ARGS }; @@ -25,6 +26,7 @@ int xsz, ysz; int num_faces, num_lights; int max_iter; + int kd_depth; }; struct Ray { @@ -61,6 +63,7 @@ rinf.num_faces = scn->get_num_faces(); rinf.num_lights = sizeof lightlist / sizeof *lightlist; rinf.max_iter = 6; + rinf.kd_depth = kdtree_depth(scn->kdtree); /* calculate primary rays */ prim_rays = new Ray[xsz * ysz]; @@ -82,6 +85,13 @@ return false; } + const KDNodeGPU *kdbuf = scn->get_kdtree_buffer(); + if(!kdbuf) { + fprintf(stderr, "failed to create kdtree buffer\n"); + return false; + } + int num_kdnodes = scn->get_num_kdnodes(); + /* setup argument buffers */ prog->set_arg_buffer(KARG_FRAMEBUFFER, ARG_WR, xsz * ysz * 4 * sizeof(float)); prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf); @@ -91,6 +101,7 @@ prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays); prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float)); prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float)); + prog->set_arg_buffer(KARG_KDTREE, ARG_RD, num_kdnodes * sizeof *kdbuf, kdbuf); if(prog->get_num_args() < NUM_KERNEL_ARGS) { return false; diff -r 8b2f2ad14ae7 -r 97cfd9675310 src/scene.cc --- a/src/scene.cc Tue Aug 17 20:35:00 2010 +0100 +++ b/src/scene.cc Sat Aug 21 03:42:49 2010 +0100 @@ -9,12 +9,12 @@ static bool build_kdtree(KDNode *kd, int level = 0); static float eval_cost(const std::list &faces, const AABBox &aabb, int axis, float par_sarea = 1.0); static void free_kdtree(KDNode *node); -static int kdtree_depth(const KDNode *node); +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf); static void print_item_counts(const KDNode *node, int level); static int accel_param[NUM_ACCEL_PARAMS] = { - 75, // max tree depth + 40, // max tree depth 0, // max items per node (0 means ignore limit) 5, // estimated traversal cost 15 // estimated interseciton cost @@ -70,11 +70,15 @@ facebuf = 0; num_faces = -1; kdtree = 0; + num_kdnodes = -1; + kdbuf = 0; } Scene::~Scene() { delete [] facebuf; + delete [] kdbuf; + free_kdtree(kdtree); } bool Scene::add_mesh(Mesh *m) @@ -158,6 +162,35 @@ return facebuf; } +const KDNodeGPU *Scene::get_kdtree_buffer() const +{ + if(kdbuf) { + return kdbuf; + } + + if(!kdtree) { + ((Scene*)this)->build_kdtree(); + } + + if(!get_num_kdnodes()) { + return 0; + } + + kdbuf = new KDNodeGPU[num_kdnodes + 1]; + kdtree_gpu_flatten(kdbuf, 1, kdtree, get_face_buffer()); + return kdbuf; +} + +int Scene::get_num_kdnodes() const +{ + if(num_kdnodes >= 0) { + return num_kdnodes; + } + + num_kdnodes = kdtree_nodes(kdtree); + return num_kdnodes; +} + void Scene::draw_kdtree() const { @@ -175,22 +208,10 @@ static float palette[][3] = { {0, 1, 0}, - {0, 1, 0}, - {0, 1, 0}, - {1, 0, 0}, - {1, 0, 0}, {1, 0, 0}, {0, 0, 1}, - {0, 0, 1}, - {0, 0, 1}, - {1, 1, 0}, - {1, 1, 0}, {1, 1, 0}, {0, 0, 1}, - {0, 0, 1}, - {0, 0, 1}, - {1, 0, 1}, - {1, 0, 1}, {1, 0, 1} }; static int pal_size = sizeof palette / sizeof *palette; @@ -291,10 +312,11 @@ static bool build_kdtree(KDNode *kd, int level) { + int opt_max_depth = accel_param[ACCEL_PARAM_MAX_TREE_DEPTH]; int opt_max_items = accel_param[ACCEL_PARAM_MAX_NODE_ITEMS]; int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE]; - if(kd->num_faces == 0) { + if(kd->num_faces == 0 || level >= opt_max_depth) { return true; } @@ -410,7 +432,7 @@ } } -static int kdtree_depth(const KDNode *node) +int kdtree_depth(const KDNode *node) { if(!node) return 0; @@ -419,6 +441,36 @@ return (left > right ? left : right) + 1; } +int kdtree_nodes(const KDNode *node) +{ + if(!node) return 0; + return kdtree_nodes(node->left) + kdtree_nodes(node->right) + 1; +} + +#define MAX_FACES (sizeof dest->face_idx / sizeof *dest->face_idx) +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf) +{ + KDNodeGPU *dest = kdbuf + idx; + + dest->aabb = node->aabb; + dest->num_faces = 0; + + std::list::const_iterator it = node->faces.begin(); + while(it != node->faces.end()) { + if(dest->num_faces >= (int)MAX_FACES) { + fprintf(stderr, "kdtree_gpu_flatten WARNING: more than %d faces in node, skipping!\n", (int)MAX_FACES); + break; + } + dest->face_idx[dest->num_faces++] = *it - facebuf; + } + + if(node->left) { + assert(node->right); + kdtree_gpu_flatten(kdbuf, idx * 2, node->left, facebuf); + kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right, facebuf); + } +} + static void print_item_counts(const KDNode *node, int level) { if(!node) return; diff -r 8b2f2ad14ae7 -r 97cfd9675310 src/scene.h --- a/src/scene.h Tue Aug 17 20:35:00 2010 +0100 +++ b/src/scene.h Sat Aug 21 03:42:49 2010 +0100 @@ -46,12 +46,6 @@ KDAXIS_Z }; -#define KDCLEAR(node) ((node)->axis = -1) -#define KDUSED(node) ((node)->axis >= 0) -#define KDPARENT(x) ((x) >> 1) -#define KDLEFT(x) ((x) << 1) -#define KDRIGHT(x) (((x) << 1) + 1) - struct KDNode { int axis; float pt; @@ -66,8 +60,10 @@ }; struct KDNodeGPU { - int axis; - float pt; + AABBox aabb; + int face_idx[32]; + int num_faces; + int padding[3]; }; @@ -76,12 +72,13 @@ mutable Face *facebuf; mutable int num_faces; + mutable KDNodeGPU *kdbuf; + mutable int num_kdnodes; + public: std::vector meshes; std::vector matlib; - KDNode *kdtree; - std::vector kdtree_gpu; Scene(); ~Scene(); @@ -98,6 +95,8 @@ bool load(FILE *fp); const Face *get_face_buffer() const; + const KDNodeGPU *get_kdtree_buffer() const; + int get_num_kdnodes() const; void draw_kdtree() const; bool build_kdtree(); @@ -114,4 +113,7 @@ void set_accel_param(int p, int v); +int kdtree_depth(const KDNode *tree); +int kdtree_nodes(const KDNode *tree); + #endif /* MESH_H_ */