clray

diff src/scene.cc @ 32:4cf4919c3812

performance sucks
author John Tsiombikas <nuclear@member.fsf.org>
date Tue, 24 Aug 2010 05:43:57 +0100
parents 92786fc3317e
children a218551293ad 7d77ded5f890
line diff
     1.1 --- a/src/scene.cc	Sun Aug 22 00:50:47 2010 +0100
     1.2 +++ b/src/scene.cc	Tue Aug 24 05:43:57 2010 +0100
     1.3 @@ -6,10 +6,10 @@
     1.4  
     1.5  
     1.6  static void draw_kdtree(const KDNode *node, int level = 0);
     1.7 -static bool build_kdtree(KDNode *kd, int level = 0);
     1.8 -static float eval_cost(const std::list<const Face*> &faces, const AABBox &aabb, int axis, float par_sarea = 1.0);
     1.9 +static bool build_kdtree(KDNode *kd, const Face *faces, int level = 0);
    1.10 +static float eval_cost(const Face *faces, const int *face_idx, int num_faces, const AABBox &aabb, int axis);
    1.11  static void free_kdtree(KDNode *node);
    1.12 -static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf);
    1.13 +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node);
    1.14  static void print_item_counts(const KDNode *node, int level);
    1.15  
    1.16  
    1.17 @@ -58,10 +58,8 @@
    1.18  
    1.19  KDNode::KDNode()
    1.20  {
    1.21 -	axis = 0;
    1.22 -	pt = 0.0;
    1.23  	left = right = 0;
    1.24 -	num_faces = 0;
    1.25 +	cost = 0.0;
    1.26  }
    1.27  
    1.28  
    1.29 @@ -175,7 +173,7 @@
    1.30  	printf("allocating storage for the complete tree (%d)\n", max_nodes);
    1.31  
    1.32  	kdbuf = new KDNodeGPU[max_nodes + 1];
    1.33 -	kdtree_gpu_flatten(kdbuf, 1, kdtree, get_face_buffer());
    1.34 +	kdtree_gpu_flatten(kdbuf, 1, kdtree);
    1.35  	return kdbuf;
    1.36  }
    1.37  
    1.38 @@ -298,15 +296,14 @@
    1.39  			}
    1.40  		}
    1.41  
    1.42 -		kdtree->faces.push_back(face);	// add the face
    1.43 -		kdtree->num_faces++;
    1.44 +		kdtree->face_idx.push_back(i);	// add the face
    1.45  	}
    1.46  
    1.47  	// calculate the heuristic for the root
    1.48 -	kdtree->cost = eval_cost(kdtree->faces, kdtree->aabb, kdtree->axis);
    1.49 +	kdtree->cost = eval_cost(faces, &kdtree->face_idx[0], kdtree->face_idx.size(), kdtree->aabb, 0);
    1.50  
    1.51  	// now proceed splitting the root recursively
    1.52 -	if(!::build_kdtree(kdtree)) {
    1.53 +	if(!::build_kdtree(kdtree, faces)) {
    1.54  		fprintf(stderr, "failed to build kdtree\n");
    1.55  		return false;
    1.56  	}
    1.57 @@ -316,36 +313,34 @@
    1.58  	return true;
    1.59  }
    1.60  
    1.61 -static bool build_kdtree(KDNode *kd, int level)
    1.62 +static bool build_kdtree(KDNode *kd, const Face *faces, int level)
    1.63  {
    1.64  	int opt_max_depth = accel_param[ACCEL_PARAM_MAX_TREE_DEPTH];
    1.65  	int opt_max_items = accel_param[ACCEL_PARAM_MAX_NODE_ITEMS];
    1.66  	int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
    1.67  
    1.68 -	if(kd->num_faces == 0 || level >= opt_max_depth) {
    1.69 +	if(kd->face_idx.empty() || level >= opt_max_depth) {
    1.70  		return true;
    1.71  	}
    1.72  
    1.73  	int axis = level % 3;
    1.74 -	//float parent_sa = kd->aabb.calc_surface_area();
    1.75  
    1.76  	float best_cost[2], best_sum_cost = FLT_MAX;
    1.77  	float best_split;
    1.78  
    1.79 -	std::list<const Face*>::iterator it = kd->faces.begin();
    1.80 -	while(it != kd->faces.end()) {
    1.81 -		const Face *face = *it++;
    1.82 +	for(size_t i=0; i<kd->face_idx.size(); i++) {
    1.83 +		const Face *face = faces + kd->face_idx[i];
    1.84  
    1.85 -		for(int i=0; i<3; i++) {
    1.86 +		for(int j=0; j<3; j++) {
    1.87  			AABBox aabb_left, aabb_right;
    1.88 -			const float *split = face->v[i].pos;
    1.89 +			const float *split = face->v[j].pos;
    1.90  
    1.91  			aabb_left = aabb_right = kd->aabb;
    1.92  			aabb_left.max[axis] = split[axis];
    1.93  			aabb_right.min[axis] = split[axis];
    1.94  
    1.95 -			float left_cost = eval_cost(kd->faces, aabb_left, axis);
    1.96 -			float right_cost = eval_cost(kd->faces, aabb_right, axis);
    1.97 +			float left_cost = eval_cost(faces, &kd->face_idx[0], kd->face_idx.size(), aabb_left, axis);
    1.98 +			float right_cost = eval_cost(faces, &kd->face_idx[0], kd->face_idx.size(), aabb_right, axis);
    1.99  			float sum_cost = left_cost + right_cost - tcost;	// tcost is added twice
   1.100  
   1.101  			if(sum_cost < best_sum_cost) {
   1.102 @@ -358,10 +353,9 @@
   1.103  	}
   1.104  
   1.105  	//printf("current cost: %f,   best_cost: %f\n", kd->cost, best_sum_cost);
   1.106 -	if(best_sum_cost > kd->cost && (opt_max_items == 0 || kd->num_faces <= opt_max_items)) {
   1.107 +	if(best_sum_cost > kd->cost && (opt_max_items == 0 || (int)kd->face_idx.size() <= opt_max_items)) {
   1.108  		return true;	// stop splitting if it doesn't reduce the cost
   1.109  	}
   1.110 -	kd->pt = best_split;
   1.111  
   1.112  	// create the two children
   1.113  	KDNode *kdleft, *kdright;
   1.114 @@ -376,45 +370,40 @@
   1.115  	kdleft->cost = best_cost[0];
   1.116  	kdright->cost = best_cost[1];
   1.117  
   1.118 -	//kdleft->axis = kdright->axis = (axis + 1) % 3;
   1.119 -
   1.120 -	it = kd->faces.begin();
   1.121 -	while(it != kd->faces.end()) {
   1.122 -		const Face *face = *it++;
   1.123 +	for(size_t i=0; i<kd->face_idx.size(); i++) {
   1.124 +		int fidx = kd->face_idx[i];
   1.125 +		const Face *face = faces + fidx;
   1.126  
   1.127  		if(face->v[0].pos[axis] < best_split ||
   1.128  				face->v[1].pos[axis] < best_split ||
   1.129  				face->v[2].pos[axis] < best_split) {
   1.130 -			kdleft->faces.push_back(face);
   1.131 -			kdleft->num_faces++;
   1.132 +			kdleft->face_idx.push_back(fidx);
   1.133  		}
   1.134  		if(face->v[0].pos[axis] >= best_split ||
   1.135  				face->v[1].pos[axis] >= best_split ||
   1.136  				face->v[2].pos[axis] >= best_split) {
   1.137 -			kdright->faces.push_back(face);
   1.138 -			kdright->num_faces++;
   1.139 +			kdright->face_idx.push_back(fidx);
   1.140  		}
   1.141  	}
   1.142 -	kd->faces.clear();	// only leaves have faces
   1.143 +	kd->face_idx.clear();	// only leaves have faces
   1.144  
   1.145  	kd->left = kdleft;
   1.146  	kd->right = kdright;
   1.147  
   1.148 -	return build_kdtree(kd->left, level + 1) && build_kdtree(kd->right, level + 1);
   1.149 +	return build_kdtree(kd->left, faces, level + 1) && build_kdtree(kd->right, faces, level + 1);
   1.150  }
   1.151  
   1.152 -static float eval_cost(const std::list<const Face*> &faces, const AABBox &aabb, int axis, float par_sarea)
   1.153 +static float eval_cost(const Face *faces, const int *face_idx, int num_faces, const AABBox &aabb, int axis)
   1.154  {
   1.155  	int num_inside = 0;
   1.156  	int tcost = accel_param[ACCEL_PARAM_COST_TRAVERSE];
   1.157  	int icost = accel_param[ACCEL_PARAM_COST_INTERSECT];
   1.158  
   1.159 -	std::list<const Face*>::const_iterator it = faces.begin();
   1.160 -	while(it != faces.end()) {
   1.161 -		const Face *face = *it++;
   1.162 +	for(int i=0; i<num_faces; i++) {
   1.163 +		const Face *face = faces + face_idx[i];
   1.164  
   1.165 -		for(int i=0; i<3; i++) {
   1.166 -			if(face->v[i].pos[axis] >= aabb.min[axis] && face->v[i].pos[axis] < aabb.max[axis]) {
   1.167 +		for(int j=0; j<3; j++) {
   1.168 +			if(face->v[j].pos[axis] >= aabb.min[axis] && face->v[j].pos[axis] < aabb.max[axis]) {
   1.169  				num_inside++;
   1.170  				break;
   1.171  			}
   1.172 @@ -426,7 +415,7 @@
   1.173  		return FLT_MAX;	// heavily penalize 0-area voxels
   1.174  	}
   1.175  
   1.176 -	return tcost + (sarea / par_sarea) * num_inside * icost;
   1.177 +	return tcost + sarea * num_inside * icost;
   1.178  }
   1.179  
   1.180  static void free_kdtree(KDNode *node)
   1.181 @@ -454,20 +443,19 @@
   1.182  }
   1.183  
   1.184  #define MAX_FACES	(sizeof dest->face_idx / sizeof *dest->face_idx)
   1.185 -static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node, const Face *facebuf)
   1.186 +static void kdtree_gpu_flatten(KDNodeGPU *kdbuf, int idx, const KDNode *node)
   1.187  {
   1.188  	KDNodeGPU *dest = kdbuf + idx;
   1.189  
   1.190  	dest->aabb = node->aabb;
   1.191  	dest->num_faces = 0;
   1.192  
   1.193 -	std::list<const Face*>::const_iterator it = node->faces.begin();
   1.194 -	while(it != node->faces.end()) {
   1.195 +	for(size_t i=0; i<node->face_idx.size(); i++) {
   1.196  		if(dest->num_faces >= (int)MAX_FACES) {
   1.197  			fprintf(stderr, "kdtree_gpu_flatten WARNING: more than %d faces in node, skipping!\n", (int)MAX_FACES);
   1.198  			break;
   1.199  		}
   1.200 -		dest->face_idx[dest->num_faces++] = *it++ - facebuf;
   1.201 +		dest->face_idx[dest->num_faces++] = node->face_idx[i];
   1.202  	}
   1.203  
   1.204  	if(node->left) {
   1.205 @@ -476,8 +464,8 @@
   1.206  
   1.207  		dest->num_faces = -1;
   1.208  
   1.209 -		kdtree_gpu_flatten(kdbuf, idx * 2, node->left, facebuf);
   1.210 -		kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right, facebuf);
   1.211 +		kdtree_gpu_flatten(kdbuf, idx * 2, node->left);
   1.212 +		kdtree_gpu_flatten(kdbuf, idx * 2 + 1, node->right);
   1.213  	}
   1.214  }
   1.215  
   1.216 @@ -488,7 +476,7 @@
   1.217  	for(int i=0; i<level; i++) {
   1.218  		fputs("   ", stdout);
   1.219  	}
   1.220 -	printf("- %d (cost: %f)\n", node->num_faces, node->cost);
   1.221 +	printf("- %d (cost: %f)\n", (int)node->face_idx.size(), node->cost);
   1.222  
   1.223  	print_item_counts(node->left, level + 1);
   1.224  	print_item_counts(node->right, level + 1);