clray

view src/scene.cc @ 32:4cf4919c3812

performance sucks
author John Tsiombikas <nuclear@member.fsf.org>
date Tue, 24 Aug 2010 05:43:57 +0100
parents 92786fc3317e
children a218551293ad 7d77ded5f890
line source
1 #include <math.h>
2 #include <float.h>
3 #include <assert.h>
4 #include "scene.h"
5 #include "ogl.h"
8 static void draw_kdtree(const KDNode *node, int level = 0);
9 static bool build_kdtree(KDNode *kd, const Face *faces, int level = 0);
10 static float eval_cost(const Face *faces, const int *face_idx, int num_faces, const AABBox &aabb, int axis);
11 static void free_kdtree(KDNode *node);
12 static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node);
13 static void print_item_counts(const KDNode *node, int level);
16 static int accel_param[NUM_ACCEL_PARAMS] = {
17 40, // max tree depth
18 0, // max items per node (0 means ignore limit)
19 5, // estimated traversal cost
20 15 // estimated interseciton cost
21 };
24 void set_accel_param(int p, int v)
25 {
26 assert(p >= 0 && p < NUM_ACCEL_PARAMS);
27 accel_param[p] = v;
28 }
31 #define FEQ(a, b) (fabs((a) - (b)) < 1e-8)
32 bool Face::operator ==(const Face &f) const
33 {
34 for(int i=0; i<3; i++) {
35 for(int j=0; j<3; j++) {
36 if(!FEQ(v[i].pos[j], f.v[i].pos[j])) {
37 return false;
38 }
39 if(!FEQ(v[i].normal[j], f.v[i].normal[j])) {
40 return false;
41 }
42 }
43 if(!FEQ(normal[i], f.normal[i])) {
44 return false;
45 }
46 }
47 return true;
48 }
50 float AABBox::calc_surface_area() const
51 {
52 float area1 = (max[0] - min[0]) * (max[1] - min[1]);
53 float area2 = (max[3] - min[3]) * (max[1] - min[1]);
54 float area3 = (max[0] - min[0]) * (max[3] - min[3]);
56 return 2.0f * (area1 + area2 + area3);
57 }
59 KDNode::KDNode()
60 {
61 left = right = 0;
62 cost = 0.0;
63 }
66 Scene::Scene()
67 {
68 facebuf = 0;
69 num_faces = -1;
70 kdtree = 0;
71 kdbuf = 0;
72 }
74 Scene::~Scene()
75 {
76 delete [] facebuf;
77 delete [] kdbuf;
78 free_kdtree(kdtree);
79 }
81 bool Scene::add_mesh(Mesh *m)
82 {
83 // make sure triangles have material ids
84 for(size_t i=0; i<m->faces.size(); i++) {
85 m->faces[i].matid = m->matid;
86 }
88 try {
89 meshes.push_back(m);
90 }
91 catch(...) {
92 return false;
93 }
95 // invalidate facebuffer and count
96 delete [] facebuf;
97 facebuf = 0;
98 num_faces = -1;
100 return true;
101 }
103 int Scene::get_num_meshes() const
104 {
105 return (int)meshes.size();
106 }
108 int Scene::get_num_faces() const
109 {
110 if(num_faces >= 0) {
111 return num_faces;
112 }
114 num_faces = 0;
115 for(size_t i=0; i<meshes.size(); i++) {
116 num_faces += meshes[i]->faces.size();
117 }
118 return num_faces;
119 }
121 int Scene::get_num_materials() const
122 {
123 return (int)matlib.size();
124 }
126 Material *Scene::get_materials()
127 {
128 if(matlib.empty()) {
129 return 0;
130 }
131 return &matlib[0];
132 }
134 const Material *Scene::get_materials() const
135 {
136 if(matlib.empty()) {
137 return 0;
138 }
139 return &matlib[0];
140 }
142 const Face *Scene::get_face_buffer() const
143 {
144 if(facebuf) {
145 return facebuf;
146 }
148 int num_meshes = get_num_meshes();
150 printf("constructing face buffer with %d faces (out of %d meshes)\n", get_num_faces(), num_meshes);
151 facebuf = new Face[num_faces];
152 Face *fptr = facebuf;
154 for(int i=0; i<num_meshes; i++) {
155 for(size_t j=0; j<meshes[i]->faces.size(); j++) {
156 *fptr++ = meshes[i]->faces[j];
157 }
158 }
159 return facebuf;
160 }
162 const KDNodeGPU *Scene::get_kdtree_buffer() const
163 {
164 if(kdbuf) {
165 return kdbuf;
166 }
168 if(!kdtree) {
169 ((Scene*)this)->build_kdtree();
170 }
172 int max_nodes = (int)pow(2, kdtree_depth(kdtree)) - 1;
173 printf("allocating storage for the complete tree (%d)\n", max_nodes);
175 kdbuf = new KDNodeGPU[max_nodes + 1];
176 kdtree_gpu_flatten(kdbuf, 1, kdtree);
177 return kdbuf;
178 }
180 static int ipow(int x, int n)
181 {
182 assert(n >= 0);
184 int res = 1;
185 for(int i=0; i<n; i++) {
186 res *= x;
187 }
188 return res;
189 }
191 int Scene::get_kdtree_buffer_size() const
192 {
193 // 2**depth - 1 nodes for the complete tree + 1 for the unused heap item 0.
194 return ipow(2, kdtree_depth(kdtree)) * sizeof(KDNodeGPU);
195 }
197 void Scene::draw_kdtree() const
198 {
199 glPushAttrib(GL_ENABLE_BIT);
200 glDisable(GL_LIGHTING);
201 glDepthMask(0);
203 glBegin(GL_LINES);
204 ::draw_kdtree(kdtree, 0);
205 glEnd();
207 glDepthMask(1);
208 glPopAttrib();
209 }
211 static float palette[][3] = {
212 {0, 1, 0},
213 {1, 0, 0},
214 {0, 0, 1},
215 {1, 1, 0},
216 {0, 0, 1},
217 {1, 0, 1}
218 };
219 static int pal_size = sizeof palette / sizeof *palette;
221 static void draw_kdtree(const KDNode *node, int level)
222 {
223 if(!node) return;
225 draw_kdtree(node->left, level + 1);
226 draw_kdtree(node->right, level + 1);
228 glColor3fv(palette[level % pal_size]);
230 glVertex3fv(node->aabb.min);
231 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.min[2]);
232 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.min[2]);
233 glVertex3f(node->aabb.max[0], node->aabb.max[1], node->aabb.min[2]);
234 glVertex3f(node->aabb.max[0], node->aabb.max[1], node->aabb.min[2]);
235 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.min[2]);
236 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.min[2]);
237 glVertex3fv(node->aabb.min);
239 glVertex3f(node->aabb.min[0], node->aabb.min[1], node->aabb.max[2]);
240 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.max[2]);
241 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.max[2]);
242 glVertex3fv(node->aabb.max);
243 glVertex3fv(node->aabb.max);
244 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.max[2]);
245 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.max[2]);
246 glVertex3f(node->aabb.min[0], node->aabb.min[1], node->aabb.max[2]);
248 glVertex3fv(node->aabb.min);
249 glVertex3f(node->aabb.min[0], node->aabb.min[1], node->aabb.max[2]);
250 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.min[2]);
251 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.max[2]);
252 glVertex3f(node->aabb.max[0], node->aabb.max[1], node->aabb.min[2]);
253 glVertex3fv(node->aabb.max);
254 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.min[2]);
255 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.max[2]);
256 }
258 bool Scene::build_kdtree()
259 {
260 assert(kdtree == 0);
262 const Face *faces = get_face_buffer();
263 int num_faces = get_num_faces();
265 printf("Constructing kd-tree out of %d faces ...\n", num_faces);
267 int icost = accel_param[ACCEL_PARAM_COST_INTERSECT];
268 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
269 printf(" max items per leaf: %d\n", accel_param[ACCEL_PARAM_MAX_NODE_ITEMS]);
270 printf(" SAH parameters - tcost: %d - icost: %d\n", tcost, icost);
272 free_kdtree(kdtree);
273 kdtree = new KDNode;
275 /* Start the construction of the kdtree by adding all faces of the scene
276 * to the new root node. At the same time calculate the root's AABB.
277 */
278 kdtree->aabb.min[0] = kdtree->aabb.min[1] = kdtree->aabb.min[2] = FLT_MAX;
279 kdtree->aabb.max[0] = kdtree->aabb.max[1] = kdtree->aabb.max[2] = -FLT_MAX;
281 for(int i=0; i<num_faces; i++) {
282 const Face *face = faces + i;
284 // for each vertex of the face ...
285 for(int j=0; j<3; j++) {
286 const float *pos = face->v[j].pos;
288 // for each element (xyz) of the position vector ...
289 for(int k=0; k<3; k++) {
290 if(pos[k] < kdtree->aabb.min[k]) {
291 kdtree->aabb.min[k] = pos[k];
292 }
293 if(pos[k] > kdtree->aabb.max[k]) {
294 kdtree->aabb.max[k] = pos[k];
295 }
296 }
297 }
299 kdtree->face_idx.push_back(i); // add the face
300 }
302 // calculate the heuristic for the root
303 kdtree->cost = eval_cost(faces, &kdtree->face_idx[0], kdtree->face_idx.size(), kdtree->aabb, 0);
305 // now proceed splitting the root recursively
306 if(!::build_kdtree(kdtree, faces)) {
307 fprintf(stderr, "failed to build kdtree\n");
308 return false;
309 }
311 printf(" tree depth: %d\n", kdtree_depth(kdtree));
312 print_item_counts(kdtree, 0);
313 return true;
314 }
316 static bool build_kdtree(KDNode *kd, const Face *faces, int level)
317 {
318 int opt_max_depth = accel_param[ACCEL_PARAM_MAX_TREE_DEPTH];
319 int opt_max_items = accel_param[ACCEL_PARAM_MAX_NODE_ITEMS];
320 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
322 if(kd->face_idx.empty() || level >= opt_max_depth) {
323 return true;
324 }
326 int axis = level % 3;
328 float best_cost[2], best_sum_cost = FLT_MAX;
329 float best_split;
331 for(size_t i=0; i<kd->face_idx.size(); i++) {
332 const Face *face = faces + kd->face_idx[i];
334 for(int j=0; j<3; j++) {
335 AABBox aabb_left, aabb_right;
336 const float *split = face->v[j].pos;
338 aabb_left = aabb_right = kd->aabb;
339 aabb_left.max[axis] = split[axis];
340 aabb_right.min[axis] = split[axis];
342 float left_cost = eval_cost(faces, &kd->face_idx[0], kd->face_idx.size(), aabb_left, axis);
343 float right_cost = eval_cost(faces, &kd->face_idx[0], kd->face_idx.size(), aabb_right, axis);
344 float sum_cost = left_cost + right_cost - tcost; // tcost is added twice
346 if(sum_cost < best_sum_cost) {
347 best_cost[0] = left_cost;
348 best_cost[1] = right_cost;
349 best_sum_cost = sum_cost;
350 best_split = split[axis];
351 }
352 }
353 }
355 //printf("current cost: %f, best_cost: %f\n", kd->cost, best_sum_cost);
356 if(best_sum_cost > kd->cost && (opt_max_items == 0 || (int)kd->face_idx.size() <= opt_max_items)) {
357 return true; // stop splitting if it doesn't reduce the cost
358 }
360 // create the two children
361 KDNode *kdleft, *kdright;
362 kdleft = new KDNode;
363 kdright = new KDNode;
365 kdleft->aabb = kdright->aabb = kd->aabb;
367 kdleft->aabb.max[axis] = best_split;
368 kdright->aabb.min[axis] = best_split;
370 kdleft->cost = best_cost[0];
371 kdright->cost = best_cost[1];
373 for(size_t i=0; i<kd->face_idx.size(); i++) {
374 int fidx = kd->face_idx[i];
375 const Face *face = faces + fidx;
377 if(face->v[0].pos[axis] < best_split ||
378 face->v[1].pos[axis] < best_split ||
379 face->v[2].pos[axis] < best_split) {
380 kdleft->face_idx.push_back(fidx);
381 }
382 if(face->v[0].pos[axis] >= best_split ||
383 face->v[1].pos[axis] >= best_split ||
384 face->v[2].pos[axis] >= best_split) {
385 kdright->face_idx.push_back(fidx);
386 }
387 }
388 kd->face_idx.clear(); // only leaves have faces
390 kd->left = kdleft;
391 kd->right = kdright;
393 return build_kdtree(kd->left, faces, level + 1) && build_kdtree(kd->right, faces, level + 1);
394 }
396 static float eval_cost(const Face *faces, const int *face_idx, int num_faces, const AABBox &aabb, int axis)
397 {
398 int num_inside = 0;
399 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
400 int icost = accel_param[ACCEL_PARAM_COST_INTERSECT];
402 for(int i=0; i<num_faces; i++) {
403 const Face *face = faces + face_idx[i];
405 for(int j=0; j<3; j++) {
406 if(face->v[j].pos[axis] >= aabb.min[axis] && face->v[j].pos[axis] < aabb.max[axis]) {
407 num_inside++;
408 break;
409 }
410 }
411 }
413 float sarea = aabb.calc_surface_area();
414 if(sarea < 1e-8) {
415 return FLT_MAX; // heavily penalize 0-area voxels
416 }
418 return tcost + sarea * num_inside * icost;
419 }
421 static void free_kdtree(KDNode *node)
422 {
423 if(node) {
424 free_kdtree(node->left);
425 free_kdtree(node->right);
426 delete node;
427 }
428 }
430 int kdtree_depth(const KDNode *node)
431 {
432 if(!node) return 0;
434 int left = kdtree_depth(node->left);
435 int right = kdtree_depth(node->right);
436 return (left > right ? left : right) + 1;
437 }
439 int kdtree_nodes(const KDNode *node)
440 {
441 if(!node) return 0;
442 return kdtree_nodes(node->left) + kdtree_nodes(node->right) + 1;
443 }
445 #define MAX_FACES (sizeof dest->face_idx / sizeof *dest->face_idx)
446 static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node)
447 {
448 KDNodeGPU *dest = kdbuf + idx;
450 dest->aabb = node->aabb;
451 dest->num_faces = 0;
453 for(size_t i=0; i<node->face_idx.size(); i++) {
454 if(dest->num_faces >= (int)MAX_FACES) {
455 fprintf(stderr, "kdtree_gpu_flatten WARNING: more than %d faces in node, skipping!\n", (int)MAX_FACES);
456 break;
457 }
458 dest->face_idx[dest->num_faces++] = node->face_idx[i];
459 }
461 if(node->left) {
462 assert(node->right);
463 assert(!dest->num_faces);
465 dest->num_faces = -1;
467 kdtree_gpu_flatten(kdbuf, idx * 2, node->left);
468 kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right);
469 }
470 }
472 static void print_item_counts(const KDNode *node, int level)
473 {
474 if(!node) return;
476 for(int i=0; i<level; i++) {
477 fputs(" ", stdout);
478 }
479 printf("- %d (cost: %f)\n", (int)node->face_idx.size(), node->cost);
481 print_item_counts(node->left, level + 1);
482 print_item_counts(node->right, level + 1);
483 }