clray

annotate src/scene.cc @ 32:4cf4919c3812

performance sucks
author John Tsiombikas <nuclear@member.fsf.org>
date Tue, 24 Aug 2010 05:43:57 +0100
parents 92786fc3317e
children a218551293ad 7d77ded5f890
rev   line source
John@15 1 #include <math.h>
nuclear@25 2 #include <float.h>
nuclear@26 3 #include <assert.h>
nuclear@22 4 #include "scene.h"
nuclear@27 5 #include "ogl.h"
nuclear@6 6
nuclear@26 7
nuclear@27 8 static void draw_kdtree(const KDNode *node, int level = 0);
nuclear@32 9 static bool build_kdtree(KDNode *kd, const Face *faces, int level = 0);
nuclear@32 10 static float eval_cost(const Face *faces, const int *face_idx, int num_faces, const AABBox &aabb, int axis);
nuclear@26 11 static void free_kdtree(KDNode *node);
nuclear@32 12 static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node);
nuclear@27 13 static void print_item_counts(const KDNode *node, int level);
nuclear@26 14
nuclear@26 15
nuclear@26 16 static int accel_param[NUM_ACCEL_PARAMS] = {
nuclear@28 17 40, // max tree depth
nuclear@26 18 0, // max items per node (0 means ignore limit)
nuclear@26 19 5, // estimated traversal cost
nuclear@26 20 15 // estimated interseciton cost
nuclear@26 21 };
nuclear@26 22
nuclear@26 23
nuclear@26 24 void set_accel_param(int p, int v)
nuclear@26 25 {
nuclear@26 26 assert(p >= 0 && p < NUM_ACCEL_PARAMS);
nuclear@26 27 accel_param[p] = v;
nuclear@26 28 }
nuclear@26 29
nuclear@26 30
John@15 31 #define FEQ(a, b) (fabs((a) - (b)) < 1e-8)
John@15 32 bool Face::operator ==(const Face &f) const
John@15 33 {
John@15 34 for(int i=0; i<3; i++) {
John@15 35 for(int j=0; j<3; j++) {
John@15 36 if(!FEQ(v[i].pos[j], f.v[i].pos[j])) {
John@15 37 return false;
John@15 38 }
John@15 39 if(!FEQ(v[i].normal[j], f.v[i].normal[j])) {
John@15 40 return false;
John@15 41 }
John@15 42 }
John@15 43 if(!FEQ(normal[i], f.normal[i])) {
John@15 44 return false;
John@15 45 }
John@15 46 }
John@15 47 return true;
John@15 48 }
John@15 49
nuclear@25 50 float AABBox::calc_surface_area() const
nuclear@25 51 {
nuclear@25 52 float area1 = (max[0] - min[0]) * (max[1] - min[1]);
nuclear@25 53 float area2 = (max[3] - min[3]) * (max[1] - min[1]);
nuclear@25 54 float area3 = (max[0] - min[0]) * (max[3] - min[3]);
nuclear@25 55
nuclear@25 56 return 2.0f * (area1 + area2 + area3);
nuclear@25 57 }
nuclear@25 58
nuclear@26 59 KDNode::KDNode()
nuclear@26 60 {
nuclear@26 61 left = right = 0;
nuclear@32 62 cost = 0.0;
nuclear@26 63 }
nuclear@26 64
nuclear@25 65
nuclear@24 66 Scene::Scene()
nuclear@24 67 {
nuclear@24 68 facebuf = 0;
nuclear@24 69 num_faces = -1;
nuclear@24 70 kdtree = 0;
nuclear@28 71 kdbuf = 0;
nuclear@24 72 }
nuclear@24 73
nuclear@24 74 Scene::~Scene()
nuclear@24 75 {
nuclear@24 76 delete [] facebuf;
nuclear@28 77 delete [] kdbuf;
nuclear@28 78 free_kdtree(kdtree);
nuclear@24 79 }
nuclear@24 80
nuclear@13 81 bool Scene::add_mesh(Mesh *m)
nuclear@13 82 {
nuclear@13 83 // make sure triangles have material ids
nuclear@13 84 for(size_t i=0; i<m->faces.size(); i++) {
nuclear@13 85 m->faces[i].matid = m->matid;
nuclear@13 86 }
nuclear@24 87
nuclear@24 88 try {
nuclear@24 89 meshes.push_back(m);
nuclear@24 90 }
nuclear@24 91 catch(...) {
nuclear@24 92 return false;
nuclear@24 93 }
nuclear@24 94
nuclear@24 95 // invalidate facebuffer and count
nuclear@24 96 delete [] facebuf;
nuclear@24 97 facebuf = 0;
nuclear@24 98 num_faces = -1;
nuclear@24 99
nuclear@13 100 return true;
nuclear@13 101 }
nuclear@13 102
John@14 103 int Scene::get_num_meshes() const
John@14 104 {
John@14 105 return (int)meshes.size();
John@14 106 }
John@14 107
nuclear@13 108 int Scene::get_num_faces() const
nuclear@13 109 {
nuclear@24 110 if(num_faces >= 0) {
nuclear@24 111 return num_faces;
nuclear@24 112 }
nuclear@24 113
nuclear@24 114 num_faces = 0;
nuclear@13 115 for(size_t i=0; i<meshes.size(); i++) {
nuclear@13 116 num_faces += meshes[i]->faces.size();
nuclear@13 117 }
nuclear@13 118 return num_faces;
nuclear@13 119 }
nuclear@13 120
John@14 121 int Scene::get_num_materials() const
John@14 122 {
John@14 123 return (int)matlib.size();
John@14 124 }
John@14 125
John@14 126 Material *Scene::get_materials()
John@14 127 {
John@14 128 if(matlib.empty()) {
John@14 129 return 0;
John@14 130 }
John@14 131 return &matlib[0];
John@14 132 }
John@14 133
John@14 134 const Material *Scene::get_materials() const
John@14 135 {
John@14 136 if(matlib.empty()) {
John@14 137 return 0;
John@14 138 }
John@14 139 return &matlib[0];
John@14 140 }
nuclear@24 141
nuclear@24 142 const Face *Scene::get_face_buffer() const
nuclear@24 143 {
nuclear@24 144 if(facebuf) {
nuclear@24 145 return facebuf;
nuclear@24 146 }
nuclear@24 147
nuclear@24 148 int num_meshes = get_num_meshes();
nuclear@24 149
nuclear@24 150 printf("constructing face buffer with %d faces (out of %d meshes)\n", get_num_faces(), num_meshes);
nuclear@24 151 facebuf = new Face[num_faces];
nuclear@24 152 Face *fptr = facebuf;
nuclear@24 153
nuclear@24 154 for(int i=0; i<num_meshes; i++) {
nuclear@24 155 for(size_t j=0; j<meshes[i]->faces.size(); j++) {
nuclear@24 156 *fptr++ = meshes[i]->faces[j];
nuclear@24 157 }
nuclear@24 158 }
nuclear@24 159 return facebuf;
nuclear@24 160 }
nuclear@24 161
nuclear@28 162 const KDNodeGPU *Scene::get_kdtree_buffer() const
nuclear@28 163 {
nuclear@28 164 if(kdbuf) {
nuclear@28 165 return kdbuf;
nuclear@28 166 }
nuclear@28 167
nuclear@28 168 if(!kdtree) {
nuclear@28 169 ((Scene*)this)->build_kdtree();
nuclear@28 170 }
nuclear@28 171
nuclear@29 172 int max_nodes = (int)pow(2, kdtree_depth(kdtree)) - 1;
nuclear@29 173 printf("allocating storage for the complete tree (%d)\n", max_nodes);
nuclear@28 174
nuclear@29 175 kdbuf = new KDNodeGPU[max_nodes + 1];
nuclear@32 176 kdtree_gpu_flatten(kdbuf, 1, kdtree);
nuclear@28 177 return kdbuf;
nuclear@28 178 }
nuclear@28 179
nuclear@29 180 static int ipow(int x, int n)
nuclear@28 181 {
nuclear@29 182 assert(n >= 0);
nuclear@29 183
nuclear@29 184 int res = 1;
nuclear@29 185 for(int i=0; i<n; i++) {
nuclear@29 186 res *= x;
nuclear@28 187 }
nuclear@29 188 return res;
nuclear@28 189 }
nuclear@28 190
nuclear@29 191 int Scene::get_kdtree_buffer_size() const
nuclear@29 192 {
nuclear@29 193 // 2**depth - 1 nodes for the complete tree + 1 for the unused heap item 0.
nuclear@29 194 return ipow(2, kdtree_depth(kdtree)) * sizeof(KDNodeGPU);
nuclear@29 195 }
nuclear@24 196
nuclear@27 197 void Scene::draw_kdtree() const
nuclear@27 198 {
nuclear@27 199 glPushAttrib(GL_ENABLE_BIT);
nuclear@27 200 glDisable(GL_LIGHTING);
nuclear@27 201 glDepthMask(0);
nuclear@27 202
nuclear@27 203 glBegin(GL_LINES);
nuclear@27 204 ::draw_kdtree(kdtree, 0);
nuclear@27 205 glEnd();
nuclear@27 206
nuclear@27 207 glDepthMask(1);
nuclear@27 208 glPopAttrib();
nuclear@27 209 }
nuclear@27 210
nuclear@27 211 static float palette[][3] = {
nuclear@27 212 {0, 1, 0},
nuclear@27 213 {1, 0, 0},
nuclear@27 214 {0, 0, 1},
nuclear@27 215 {1, 1, 0},
nuclear@27 216 {0, 0, 1},
nuclear@27 217 {1, 0, 1}
nuclear@27 218 };
nuclear@27 219 static int pal_size = sizeof palette / sizeof *palette;
nuclear@27 220
nuclear@27 221 static void draw_kdtree(const KDNode *node, int level)
nuclear@27 222 {
nuclear@27 223 if(!node) return;
nuclear@27 224
nuclear@27 225 draw_kdtree(node->left, level + 1);
nuclear@27 226 draw_kdtree(node->right, level + 1);
nuclear@27 227
nuclear@27 228 glColor3fv(palette[level % pal_size]);
nuclear@27 229
nuclear@27 230 glVertex3fv(node->aabb.min);
nuclear@27 231 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.min[2]);
nuclear@27 232 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.min[2]);
nuclear@27 233 glVertex3f(node->aabb.max[0], node->aabb.max[1], node->aabb.min[2]);
nuclear@27 234 glVertex3f(node->aabb.max[0], node->aabb.max[1], node->aabb.min[2]);
nuclear@27 235 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.min[2]);
nuclear@27 236 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.min[2]);
nuclear@27 237 glVertex3fv(node->aabb.min);
nuclear@27 238
nuclear@27 239 glVertex3f(node->aabb.min[0], node->aabb.min[1], node->aabb.max[2]);
nuclear@27 240 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.max[2]);
nuclear@27 241 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.max[2]);
nuclear@27 242 glVertex3fv(node->aabb.max);
nuclear@27 243 glVertex3fv(node->aabb.max);
nuclear@27 244 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.max[2]);
nuclear@27 245 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.max[2]);
nuclear@27 246 glVertex3f(node->aabb.min[0], node->aabb.min[1], node->aabb.max[2]);
nuclear@27 247
nuclear@27 248 glVertex3fv(node->aabb.min);
nuclear@27 249 glVertex3f(node->aabb.min[0], node->aabb.min[1], node->aabb.max[2]);
nuclear@27 250 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.min[2]);
nuclear@27 251 glVertex3f(node->aabb.max[0], node->aabb.min[1], node->aabb.max[2]);
nuclear@27 252 glVertex3f(node->aabb.max[0], node->aabb.max[1], node->aabb.min[2]);
nuclear@27 253 glVertex3fv(node->aabb.max);
nuclear@27 254 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.min[2]);
nuclear@27 255 glVertex3f(node->aabb.min[0], node->aabb.max[1], node->aabb.max[2]);
nuclear@27 256 }
nuclear@27 257
nuclear@27 258 bool Scene::build_kdtree()
nuclear@24 259 {
nuclear@29 260 assert(kdtree == 0);
nuclear@29 261
nuclear@24 262 const Face *faces = get_face_buffer();
nuclear@24 263 int num_faces = get_num_faces();
nuclear@24 264
nuclear@25 265 printf("Constructing kd-tree out of %d faces ...\n", num_faces);
nuclear@25 266
nuclear@27 267 int icost = accel_param[ACCEL_PARAM_COST_INTERSECT];
nuclear@27 268 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
nuclear@27 269 printf(" max items per leaf: %d\n", accel_param[ACCEL_PARAM_MAX_NODE_ITEMS]);
nuclear@27 270 printf(" SAH parameters - tcost: %d - icost: %d\n", tcost, icost);
nuclear@27 271
nuclear@25 272 free_kdtree(kdtree);
nuclear@25 273 kdtree = new KDNode;
nuclear@25 274
nuclear@25 275 /* Start the construction of the kdtree by adding all faces of the scene
nuclear@25 276 * to the new root node. At the same time calculate the root's AABB.
nuclear@25 277 */
nuclear@25 278 kdtree->aabb.min[0] = kdtree->aabb.min[1] = kdtree->aabb.min[2] = FLT_MAX;
nuclear@25 279 kdtree->aabb.max[0] = kdtree->aabb.max[1] = kdtree->aabb.max[2] = -FLT_MAX;
nuclear@25 280
nuclear@24 281 for(int i=0; i<num_faces; i++) {
nuclear@25 282 const Face *face = faces + i;
nuclear@25 283
nuclear@25 284 // for each vertex of the face ...
nuclear@25 285 for(int j=0; j<3; j++) {
nuclear@25 286 const float *pos = face->v[j].pos;
nuclear@25 287
nuclear@25 288 // for each element (xyz) of the position vector ...
nuclear@25 289 for(int k=0; k<3; k++) {
nuclear@25 290 if(pos[k] < kdtree->aabb.min[k]) {
nuclear@25 291 kdtree->aabb.min[k] = pos[k];
nuclear@25 292 }
nuclear@25 293 if(pos[k] > kdtree->aabb.max[k]) {
nuclear@25 294 kdtree->aabb.max[k] = pos[k];
nuclear@25 295 }
nuclear@25 296 }
nuclear@25 297 }
nuclear@25 298
nuclear@32 299 kdtree->face_idx.push_back(i); // add the face
nuclear@24 300 }
nuclear@24 301
nuclear@26 302 // calculate the heuristic for the root
nuclear@32 303 kdtree->cost = eval_cost(faces, &kdtree->face_idx[0], kdtree->face_idx.size(), kdtree->aabb, 0);
nuclear@26 304
nuclear@25 305 // now proceed splitting the root recursively
nuclear@32 306 if(!::build_kdtree(kdtree, faces)) {
nuclear@27 307 fprintf(stderr, "failed to build kdtree\n");
nuclear@27 308 return false;
nuclear@27 309 }
nuclear@27 310
nuclear@27 311 printf(" tree depth: %d\n", kdtree_depth(kdtree));
nuclear@27 312 print_item_counts(kdtree, 0);
nuclear@27 313 return true;
nuclear@24 314 }
nuclear@24 315
nuclear@32 316 static bool build_kdtree(KDNode *kd, const Face *faces, int level)
nuclear@24 317 {
nuclear@28 318 int opt_max_depth = accel_param[ACCEL_PARAM_MAX_TREE_DEPTH];
nuclear@26 319 int opt_max_items = accel_param[ACCEL_PARAM_MAX_NODE_ITEMS];
nuclear@27 320 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
nuclear@27 321
nuclear@32 322 if(kd->face_idx.empty() || level >= opt_max_depth) {
nuclear@27 323 return true;
nuclear@25 324 }
nuclear@25 325
nuclear@27 326 int axis = level % 3;
nuclear@26 327
nuclear@26 328 float best_cost[2], best_sum_cost = FLT_MAX;
nuclear@26 329 float best_split;
nuclear@26 330
nuclear@32 331 for(size_t i=0; i<kd->face_idx.size(); i++) {
nuclear@32 332 const Face *face = faces + kd->face_idx[i];
nuclear@26 333
nuclear@32 334 for(int j=0; j<3; j++) {
nuclear@26 335 AABBox aabb_left, aabb_right;
nuclear@32 336 const float *split = face->v[j].pos;
nuclear@26 337
nuclear@26 338 aabb_left = aabb_right = kd->aabb;
nuclear@26 339 aabb_left.max[axis] = split[axis];
nuclear@26 340 aabb_right.min[axis] = split[axis];
nuclear@26 341
nuclear@32 342 float left_cost = eval_cost(faces, &kd->face_idx[0], kd->face_idx.size(), aabb_left, axis);
nuclear@32 343 float right_cost = eval_cost(faces, &kd->face_idx[0], kd->face_idx.size(), aabb_right, axis);
nuclear@27 344 float sum_cost = left_cost + right_cost - tcost; // tcost is added twice
nuclear@26 345
nuclear@26 346 if(sum_cost < best_sum_cost) {
nuclear@26 347 best_cost[0] = left_cost;
nuclear@26 348 best_cost[1] = right_cost;
nuclear@26 349 best_sum_cost = sum_cost;
nuclear@26 350 best_split = split[axis];
nuclear@26 351 }
nuclear@26 352 }
nuclear@26 353 }
nuclear@26 354
nuclear@29 355 //printf("current cost: %f, best_cost: %f\n", kd->cost, best_sum_cost);
nuclear@32 356 if(best_sum_cost > kd->cost && (opt_max_items == 0 || (int)kd->face_idx.size() <= opt_max_items)) {
nuclear@27 357 return true; // stop splitting if it doesn't reduce the cost
nuclear@26 358 }
nuclear@26 359
nuclear@26 360 // create the two children
nuclear@26 361 KDNode *kdleft, *kdright;
nuclear@26 362 kdleft = new KDNode;
nuclear@26 363 kdright = new KDNode;
nuclear@26 364
nuclear@26 365 kdleft->aabb = kdright->aabb = kd->aabb;
nuclear@26 366
nuclear@26 367 kdleft->aabb.max[axis] = best_split;
nuclear@26 368 kdright->aabb.min[axis] = best_split;
nuclear@26 369
nuclear@26 370 kdleft->cost = best_cost[0];
nuclear@26 371 kdright->cost = best_cost[1];
nuclear@26 372
nuclear@32 373 for(size_t i=0; i<kd->face_idx.size(); i++) {
nuclear@32 374 int fidx = kd->face_idx[i];
nuclear@32 375 const Face *face = faces + fidx;
nuclear@26 376
nuclear@26 377 if(face->v[0].pos[axis] < best_split ||
nuclear@26 378 face->v[1].pos[axis] < best_split ||
nuclear@26 379 face->v[2].pos[axis] < best_split) {
nuclear@32 380 kdleft->face_idx.push_back(fidx);
nuclear@26 381 }
nuclear@26 382 if(face->v[0].pos[axis] >= best_split ||
nuclear@26 383 face->v[1].pos[axis] >= best_split ||
nuclear@26 384 face->v[2].pos[axis] >= best_split) {
nuclear@32 385 kdright->face_idx.push_back(fidx);
nuclear@26 386 }
nuclear@26 387 }
nuclear@32 388 kd->face_idx.clear(); // only leaves have faces
nuclear@26 389
nuclear@26 390 kd->left = kdleft;
nuclear@26 391 kd->right = kdright;
nuclear@27 392
nuclear@32 393 return build_kdtree(kd->left, faces, level + 1) && build_kdtree(kd->right, faces, level + 1);
nuclear@26 394 }
nuclear@26 395
nuclear@32 396 static float eval_cost(const Face *faces, const int *face_idx, int num_faces, const AABBox &aabb, int axis)
nuclear@26 397 {
nuclear@26 398 int num_inside = 0;
nuclear@26 399 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
nuclear@26 400 int icost = accel_param[ACCEL_PARAM_COST_INTERSECT];
nuclear@26 401
nuclear@32 402 for(int i=0; i<num_faces; i++) {
nuclear@32 403 const Face *face = faces + face_idx[i];
nuclear@26 404
nuclear@32 405 for(int j=0; j<3; j++) {
nuclear@32 406 if(face->v[j].pos[axis] >= aabb.min[axis] && face->v[j].pos[axis] < aabb.max[axis]) {
nuclear@26 407 num_inside++;
nuclear@26 408 break;
nuclear@26 409 }
nuclear@26 410 }
nuclear@26 411 }
nuclear@26 412
nuclear@27 413 float sarea = aabb.calc_surface_area();
nuclear@27 414 if(sarea < 1e-8) {
nuclear@27 415 return FLT_MAX; // heavily penalize 0-area voxels
nuclear@27 416 }
nuclear@27 417
nuclear@32 418 return tcost + sarea * num_inside * icost;
nuclear@24 419 }
nuclear@25 420
nuclear@25 421 static void free_kdtree(KDNode *node)
nuclear@25 422 {
nuclear@25 423 if(node) {
nuclear@25 424 free_kdtree(node->left);
nuclear@25 425 free_kdtree(node->right);
nuclear@25 426 delete node;
nuclear@25 427 }
nuclear@25 428 }
nuclear@27 429
nuclear@28 430 int kdtree_depth(const KDNode *node)
nuclear@27 431 {
nuclear@27 432 if(!node) return 0;
nuclear@27 433
nuclear@27 434 int left = kdtree_depth(node->left);
nuclear@27 435 int right = kdtree_depth(node->right);
nuclear@27 436 return (left > right ? left : right) + 1;
nuclear@27 437 }
nuclear@27 438
nuclear@28 439 int kdtree_nodes(const KDNode *node)
nuclear@28 440 {
nuclear@28 441 if(!node) return 0;
nuclear@28 442 return kdtree_nodes(node->left) + kdtree_nodes(node->right) + 1;
nuclear@28 443 }
nuclear@28 444
nuclear@28 445 #define MAX_FACES (sizeof dest->face_idx / sizeof *dest->face_idx)
nuclear@32 446 static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node)
nuclear@28 447 {
nuclear@28 448 KDNodeGPU *dest = kdbuf + idx;
nuclear@28 449
nuclear@28 450 dest->aabb = node->aabb;
nuclear@28 451 dest->num_faces = 0;
nuclear@28 452
nuclear@32 453 for(size_t i=0; i<node->face_idx.size(); i++) {
nuclear@28 454 if(dest->num_faces >= (int)MAX_FACES) {
nuclear@28 455 fprintf(stderr, "kdtree_gpu_flatten WARNING: more than %d faces in node, skipping!\n", (int)MAX_FACES);
nuclear@28 456 break;
nuclear@28 457 }
nuclear@32 458 dest->face_idx[dest->num_faces++] = node->face_idx[i];
nuclear@28 459 }
nuclear@28 460
nuclear@28 461 if(node->left) {
nuclear@28 462 assert(node->right);
nuclear@31 463 assert(!dest->num_faces);
nuclear@31 464
nuclear@31 465 dest->num_faces = -1;
nuclear@31 466
nuclear@32 467 kdtree_gpu_flatten(kdbuf, idx * 2, node->left);
nuclear@32 468 kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right);
nuclear@28 469 }
nuclear@28 470 }
nuclear@28 471
nuclear@27 472 static void print_item_counts(const KDNode *node, int level)
nuclear@27 473 {
nuclear@27 474 if(!node) return;
nuclear@27 475
nuclear@30 476 for(int i=0; i<level; i++) {
nuclear@27 477 fputs(" ", stdout);
nuclear@27 478 }
nuclear@32 479 printf("- %d (cost: %f)\n", (int)node->face_idx.size(), node->cost);
nuclear@27 480
nuclear@27 481 print_item_counts(node->left, level + 1);
nuclear@27 482 print_item_counts(node->right, level + 1);
nuclear@27 483 }