clray
diff src/scene.cc @ 32:4cf4919c3812
performance sucks
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Tue, 24 Aug 2010 05:43:57 +0100 |
parents | 92786fc3317e |
children | a218551293ad 7d77ded5f890 |
line diff
1.1 --- a/src/scene.cc Sun Aug 22 00:50:47 2010 +0100 1.2 +++ b/src/scene.cc Tue Aug 24 05:43:57 2010 +0100 1.3 @@ -6,10 +6,10 @@ 1.4 1.5 1.6 static void draw_kdtree(const KDNode *node, int level = 0); 1.7 -static bool build_kdtree(KDNode *kd, int level = 0); 1.8 -static float eval_cost(const std::list<const Face*> &faces, const AABBox &aabb, int axis, float par_sarea = 1.0); 1.9 +static bool build_kdtree(KDNode *kd, const Face *faces, int level = 0); 1.10 +static float eval_cost(const Face *faces, const int *face_idx, int num_faces, const AABBox &aabb, int axis); 1.11 static void free_kdtree(KDNode *node); 1.12 -static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf); 1.13 +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node); 1.14 static void print_item_counts(const KDNode *node, int level); 1.15 1.16 1.17 @@ -58,10 +58,8 @@ 1.18 1.19 KDNode::KDNode() 1.20 { 1.21 - axis = 0; 1.22 - pt = 0.0; 1.23 left = right = 0; 1.24 - num_faces = 0; 1.25 + cost = 0.0; 1.26 } 1.27 1.28 1.29 @@ -175,7 +173,7 @@ 1.30 printf("allocating storage for the complete tree (%d)\n", max_nodes); 1.31 1.32 kdbuf = new KDNodeGPU[max_nodes + 1]; 1.33 - kdtree_gpu_flatten(kdbuf, 1, kdtree, get_face_buffer()); 1.34 + kdtree_gpu_flatten(kdbuf, 1, kdtree); 1.35 return kdbuf; 1.36 } 1.37 1.38 @@ -298,15 +296,14 @@ 1.39 } 1.40 } 1.41 1.42 - kdtree->faces.push_back(face); // add the face 1.43 - kdtree->num_faces++; 1.44 + kdtree->face_idx.push_back(i); // add the face 1.45 } 1.46 1.47 // calculate the heuristic for the root 1.48 - kdtree->cost = eval_cost(kdtree->faces, kdtree->aabb, kdtree->axis); 1.49 + kdtree->cost = eval_cost(faces, &kdtree->face_idx[0], kdtree->face_idx.size(), kdtree->aabb, 0); 1.50 1.51 // now proceed splitting the root recursively 1.52 - if(!::build_kdtree(kdtree)) { 1.53 + if(!::build_kdtree(kdtree, faces)) { 1.54 fprintf(stderr, "failed to build kdtree\n"); 1.55 return false; 1.56 } 1.57 @@ -316,36 +313,34 @@ 1.58 return true; 1.59 } 1.60 1.61 -static bool build_kdtree(KDNode *kd, int level) 1.62 +static bool build_kdtree(KDNode *kd, const Face *faces, int level) 1.63 { 1.64 int opt_max_depth = accel_param[ACCEL_PARAM_MAX_TREE_DEPTH]; 1.65 int opt_max_items = accel_param[ACCEL_PARAM_MAX_NODE_ITEMS]; 1.66 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE]; 1.67 1.68 - if(kd->num_faces == 0 || level >= opt_max_depth) { 1.69 + if(kd->face_idx.empty() || level >= opt_max_depth) { 1.70 return true; 1.71 } 1.72 1.73 int axis = level % 3; 1.74 - //float parent_sa = kd->aabb.calc_surface_area(); 1.75 1.76 float best_cost[2], best_sum_cost = FLT_MAX; 1.77 float best_split; 1.78 1.79 - std::list<const Face*>::iterator it = kd->faces.begin(); 1.80 - while(it != kd->faces.end()) { 1.81 - const Face *face = *it++; 1.82 + for(size_t i=0; i<kd->face_idx.size(); i++) { 1.83 + const Face *face = faces + kd->face_idx[i]; 1.84 1.85 - for(int i=0; i<3; i++) { 1.86 + for(int j=0; j<3; j++) { 1.87 AABBox aabb_left, aabb_right; 1.88 - const float *split = face->v[i].pos; 1.89 + const float *split = face->v[j].pos; 1.90 1.91 aabb_left = aabb_right = kd->aabb; 1.92 aabb_left.max[axis] = split[axis]; 1.93 aabb_right.min[axis] = split[axis]; 1.94 1.95 - float left_cost = eval_cost(kd->faces, aabb_left, axis); 1.96 - float right_cost = eval_cost(kd->faces, aabb_right, axis); 1.97 + float left_cost = eval_cost(faces, &kd->face_idx[0], kd->face_idx.size(), aabb_left, axis); 1.98 + float right_cost = eval_cost(faces, &kd->face_idx[0], kd->face_idx.size(), aabb_right, axis); 1.99 float sum_cost = left_cost + right_cost - tcost; // tcost is added twice 1.100 1.101 if(sum_cost < best_sum_cost) { 1.102 @@ -358,10 +353,9 @@ 1.103 } 1.104 1.105 //printf("current cost: %f, best_cost: %f\n", kd->cost, best_sum_cost); 1.106 - if(best_sum_cost > kd->cost && (opt_max_items == 0 || kd->num_faces <= opt_max_items)) { 1.107 + if(best_sum_cost > kd->cost && (opt_max_items == 0 || (int)kd->face_idx.size() <= opt_max_items)) { 1.108 return true; // stop splitting if it doesn't reduce the cost 1.109 } 1.110 - kd->pt = best_split; 1.111 1.112 // create the two children 1.113 KDNode *kdleft, *kdright; 1.114 @@ -376,45 +370,40 @@ 1.115 kdleft->cost = best_cost[0]; 1.116 kdright->cost = best_cost[1]; 1.117 1.118 - //kdleft->axis = kdright->axis = (axis + 1) % 3; 1.119 - 1.120 - it = kd->faces.begin(); 1.121 - while(it != kd->faces.end()) { 1.122 - const Face *face = *it++; 1.123 + for(size_t i=0; i<kd->face_idx.size(); i++) { 1.124 + int fidx = kd->face_idx[i]; 1.125 + const Face *face = faces + fidx; 1.126 1.127 if(face->v[0].pos[axis] < best_split || 1.128 face->v[1].pos[axis] < best_split || 1.129 face->v[2].pos[axis] < best_split) { 1.130 - kdleft->faces.push_back(face); 1.131 - kdleft->num_faces++; 1.132 + kdleft->face_idx.push_back(fidx); 1.133 } 1.134 if(face->v[0].pos[axis] >= best_split || 1.135 face->v[1].pos[axis] >= best_split || 1.136 face->v[2].pos[axis] >= best_split) { 1.137 - kdright->faces.push_back(face); 1.138 - kdright->num_faces++; 1.139 + kdright->face_idx.push_back(fidx); 1.140 } 1.141 } 1.142 - kd->faces.clear(); // only leaves have faces 1.143 + kd->face_idx.clear(); // only leaves have faces 1.144 1.145 kd->left = kdleft; 1.146 kd->right = kdright; 1.147 1.148 - return build_kdtree(kd->left, level + 1) && build_kdtree(kd->right, level + 1); 1.149 + return build_kdtree(kd->left, faces, level + 1) && build_kdtree(kd->right, faces, level + 1); 1.150 } 1.151 1.152 -static float eval_cost(const std::list<const Face*> &faces, const AABBox &aabb, int axis, float par_sarea) 1.153 +static float eval_cost(const Face *faces, const int *face_idx, int num_faces, const AABBox &aabb, int axis) 1.154 { 1.155 int num_inside = 0; 1.156 int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE]; 1.157 int icost = accel_param[ACCEL_PARAM_COST_INTERSECT]; 1.158 1.159 - std::list<const Face*>::const_iterator it = faces.begin(); 1.160 - while(it != faces.end()) { 1.161 - const Face *face = *it++; 1.162 + for(int i=0; i<num_faces; i++) { 1.163 + const Face *face = faces + face_idx[i]; 1.164 1.165 - for(int i=0; i<3; i++) { 1.166 - if(face->v[i].pos[axis] >= aabb.min[axis] && face->v[i].pos[axis] < aabb.max[axis]) { 1.167 + for(int j=0; j<3; j++) { 1.168 + if(face->v[j].pos[axis] >= aabb.min[axis] && face->v[j].pos[axis] < aabb.max[axis]) { 1.169 num_inside++; 1.170 break; 1.171 } 1.172 @@ -426,7 +415,7 @@ 1.173 return FLT_MAX; // heavily penalize 0-area voxels 1.174 } 1.175 1.176 - return tcost + (sarea / par_sarea) * num_inside * icost; 1.177 + return tcost + sarea * num_inside * icost; 1.178 } 1.179 1.180 static void free_kdtree(KDNode *node) 1.181 @@ -454,20 +443,19 @@ 1.182 } 1.183 1.184 #define MAX_FACES (sizeof dest->face_idx / sizeof *dest->face_idx) 1.185 -static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf) 1.186 +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node) 1.187 { 1.188 KDNodeGPU *dest = kdbuf + idx; 1.189 1.190 dest->aabb = node->aabb; 1.191 dest->num_faces = 0; 1.192 1.193 - std::list<const Face*>::const_iterator it = node->faces.begin(); 1.194 - while(it != node->faces.end()) { 1.195 + for(size_t i=0; i<node->face_idx.size(); i++) { 1.196 if(dest->num_faces >= (int)MAX_FACES) { 1.197 fprintf(stderr, "kdtree_gpu_flatten WARNING: more than %d faces in node, skipping!\n", (int)MAX_FACES); 1.198 break; 1.199 } 1.200 - dest->face_idx[dest->num_faces++] = *it++ - facebuf; 1.201 + dest->face_idx[dest->num_faces++] = node->face_idx[i]; 1.202 } 1.203 1.204 if(node->left) { 1.205 @@ -476,8 +464,8 @@ 1.206 1.207 dest->num_faces = -1; 1.208 1.209 - kdtree_gpu_flatten(kdbuf, idx * 2, node->left, facebuf); 1.210 - kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right, facebuf); 1.211 + kdtree_gpu_flatten(kdbuf, idx * 2, node->left); 1.212 + kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right); 1.213 } 1.214 } 1.215 1.216 @@ -488,7 +476,7 @@ 1.217 for(int i=0; i<level; i++) { 1.218 fputs(" ", stdout); 1.219 } 1.220 - printf("- %d (cost: %f)\n", node->num_faces, node->cost); 1.221 + printf("- %d (cost: %f)\n", (int)node->face_idx.size(), node->cost); 1.222 1.223 print_item_counts(node->left, level + 1); 1.224 print_item_counts(node->right, level + 1);