vrshoot

diff libs/assimp/ImproveCacheLocality.cpp @ 0:b2f14e535253

initial commit
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 01 Feb 2014 19:58:19 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libs/assimp/ImproveCacheLocality.cpp	Sat Feb 01 19:58:19 2014 +0200
     1.3 @@ -0,0 +1,380 @@
     1.4 +/*
     1.5 +---------------------------------------------------------------------------
     1.6 +Open Asset Import Library (assimp)
     1.7 +---------------------------------------------------------------------------
     1.8 +
     1.9 +Copyright (c) 2006-2012, assimp team
    1.10 +
    1.11 +All rights reserved.
    1.12 +
    1.13 +Redistribution and use of this software in source and binary forms, 
    1.14 +with or without modification, are permitted provided that the following 
    1.15 +conditions are met:
    1.16 +
    1.17 +* Redistributions of source code must retain the above
    1.18 +  copyright notice, this list of conditions and the
    1.19 +  following disclaimer.
    1.20 +
    1.21 +* Redistributions in binary form must reproduce the above
    1.22 +  copyright notice, this list of conditions and the
    1.23 +  following disclaimer in the documentation and/or other
    1.24 +  materials provided with the distribution.
    1.25 +
    1.26 +* Neither the name of the assimp team, nor the names of its
    1.27 +  contributors may be used to endorse or promote products
    1.28 +  derived from this software without specific prior
    1.29 +  written permission of the assimp team.
    1.30 +
    1.31 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
    1.32 +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
    1.33 +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    1.34 +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
    1.35 +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    1.36 +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
    1.37 +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    1.38 +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
    1.39 +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    1.40 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
    1.41 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.42 +---------------------------------------------------------------------------
    1.43 +*/
    1.44 +
    1.45 +/** @file Implementation of the post processing step to improve the cache locality of a mesh.
    1.46 + * <br>
    1.47 + * The algorithm is roughly basing on this paper:
    1.48 + * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
    1.49 + *   .. although overdraw rduction isn't implemented yet ...
    1.50 + */
    1.51 +
    1.52 +#include "AssimpPCH.h"
    1.53 +
    1.54 +// internal headers
    1.55 +#include "ImproveCacheLocality.h"
    1.56 +#include "VertexTriangleAdjacency.h"
    1.57 +
    1.58 +using namespace Assimp;
    1.59 +
    1.60 +// ------------------------------------------------------------------------------------------------
    1.61 +// Constructor to be privately used by Importer
    1.62 +ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() {
    1.63 +	configCacheDepth = PP_ICL_PTCACHE_SIZE;
    1.64 +}
    1.65 +
    1.66 +// ------------------------------------------------------------------------------------------------
    1.67 +// Destructor, private as well
    1.68 +ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess()
    1.69 +{
    1.70 +	// nothing to do here
    1.71 +}
    1.72 +
    1.73 +// ------------------------------------------------------------------------------------------------
    1.74 +// Returns whether the processing step is present in the given flag field.
    1.75 +bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const
    1.76 +{
    1.77 +	return (pFlags & aiProcess_ImproveCacheLocality) != 0;
    1.78 +}
    1.79 +
    1.80 +// ------------------------------------------------------------------------------------------------
    1.81 +// Setup configuration
    1.82 +void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp)
    1.83 +{
    1.84 +	// AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
    1.85 +	configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
    1.86 +}
    1.87 +
    1.88 +// ------------------------------------------------------------------------------------------------
    1.89 +// Executes the post processing step on the given imported data.
    1.90 +void ImproveCacheLocalityProcess::Execute( aiScene* pScene)
    1.91 +{
    1.92 +	if (!pScene->mNumMeshes) {
    1.93 +		DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes");
    1.94 +		return;
    1.95 +	}
    1.96 +
    1.97 +	DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin");
    1.98 +
    1.99 +	float out = 0.f;
   1.100 +	unsigned int numf = 0, numm = 0;
   1.101 +	for( unsigned int a = 0; a < pScene->mNumMeshes; a++){
   1.102 +		const float res = ProcessMesh( pScene->mMeshes[a],a);
   1.103 +		if (res) {
   1.104 +			numf += pScene->mMeshes[a]->mNumFaces;
   1.105 +			out  += res;
   1.106 +			++numm;
   1.107 +		}
   1.108 +	}
   1.109 +	if (!DefaultLogger::isNullLogger()) {
   1.110 +		char szBuff[128]; // should be sufficiently large in every case
   1.111 +		::sprintf(szBuff,"Cache relevant are %i meshes (%i faces). Average output ACMR is %f",
   1.112 +			numm,numf,out/numf);
   1.113 +
   1.114 +		DefaultLogger::get()->info(szBuff);
   1.115 +		DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. ");
   1.116 +	}
   1.117 +}
   1.118 +
   1.119 +// ------------------------------------------------------------------------------------------------
   1.120 +// Improves the cache coherency of a specific mesh
   1.121 +float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum)
   1.122 +{
   1.123 +	// TODO: rewrite this to use std::vector or boost::shared_array
   1.124 +	ai_assert(NULL != pMesh);
   1.125 +
   1.126 +	// Check whether the input data is valid
   1.127 +	// - there must be vertices and faces 
   1.128 +	// - all faces must be triangulated or we can't operate on them
   1.129 +	if (!pMesh->HasFaces() || !pMesh->HasPositions())
   1.130 +		return 0.f;
   1.131 +
   1.132 +	if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE)	{
   1.133 +		DefaultLogger::get()->error("This algorithm works on triangle meshes only");
   1.134 +		return 0.f;
   1.135 +	}
   1.136 +
   1.137 +	if(pMesh->mNumVertices <= configCacheDepth) {
   1.138 +		return 0.f;
   1.139 +	}
   1.140 +
   1.141 +	float fACMR = 3.f;
   1.142 +	const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
   1.143 +
   1.144 +	// Input ACMR is for logging purposes only
   1.145 +	if (!DefaultLogger::isNullLogger())		{
   1.146 +
   1.147 +		unsigned int* piFIFOStack = new unsigned int[configCacheDepth];
   1.148 +		memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int));
   1.149 +		unsigned int* piCur = piFIFOStack;
   1.150 +		const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth;
   1.151 +
   1.152 +		// count the number of cache misses
   1.153 +		unsigned int iCacheMisses = 0;
   1.154 +		for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace)	{
   1.155 +
   1.156 +			for (unsigned int qq = 0; qq < 3;++qq) {
   1.157 +				bool bInCache = false;
   1.158 +
   1.159 +				for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp)	{
   1.160 +					if (*pp == pcFace->mIndices[qq])	{
   1.161 +						// the vertex is in cache
   1.162 +						bInCache = true;
   1.163 +						break;
   1.164 +					}
   1.165 +				}
   1.166 +				if (!bInCache)	{
   1.167 +					++iCacheMisses;
   1.168 +					if (piCurEnd == piCur) {
   1.169 +						piCur = piFIFOStack;
   1.170 +					}
   1.171 +					*piCur++ = pcFace->mIndices[qq];
   1.172 +				}
   1.173 +			}
   1.174 +		}
   1.175 +		delete[] piFIFOStack;
   1.176 +		fACMR = (float)iCacheMisses / pMesh->mNumFaces;
   1.177 +		if (3.0 == fACMR)	{
   1.178 +			char szBuff[128]; // should be sufficiently large in every case
   1.179 +
   1.180 +			// the JoinIdenticalVertices process has not been executed on this
   1.181 +			// mesh, otherwise this value would normally be at least minimally
   1.182 +			// smaller than 3.0 ...
   1.183 +			sprintf(szBuff,"Mesh %i: Not suitable for vcache optimization",meshNum);
   1.184 +			DefaultLogger::get()->warn(szBuff);
   1.185 +			return 0.f;
   1.186 +		}
   1.187 +	}
   1.188 +
   1.189 +	// first we need to build a vertex-triangle adjacency list
   1.190 +	VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
   1.191 +
   1.192 +	// build a list to store per-vertex caching time stamps
   1.193 +	unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
   1.194 +	memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
   1.195 +
   1.196 +	// allocate an empty output index buffer. We store the output indices in one large array.
   1.197 +	// Since the number of triangles won't change the input faces can be reused. This is how 
   1.198 +	// we save thousands of redundant mini allocations for aiFace::mIndices
   1.199 +	const unsigned int iIdxCnt = pMesh->mNumFaces*3;
   1.200 +	unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
   1.201 +	unsigned int* piCSIter = piIBOutput;
   1.202 +
   1.203 +	// allocate the flag array to hold the information
   1.204 +	// whether a face has already been emitted or not
   1.205 +	std::vector<bool> abEmitted(pMesh->mNumFaces,false);
   1.206 +
   1.207 +	// dead-end vertex index stack
   1.208 +	std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
   1.209 +
   1.210 +	// create a copy of the piNumTriPtr buffer
   1.211 +	unsigned int* const piNumTriPtr = adj.mLiveTriangles;
   1.212 +	const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
   1.213 +	
   1.214 +	// get the largest number of referenced triangles and allocate the "candidate buffer"
   1.215 +	unsigned int iMaxRefTris = 0; {
   1.216 +		const unsigned int* piCur = adj.mLiveTriangles;
   1.217 +		const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
   1.218 +		for (;piCur != piCurEnd;++piCur) {
   1.219 +			iMaxRefTris = std::max(iMaxRefTris,*piCur);
   1.220 +		}
   1.221 +	}
   1.222 +	unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
   1.223 +	unsigned int iCacheMisses = 0;
   1.224 +
   1.225 +	// ...................................................................................
   1.226 +	/** PSEUDOCODE for the algorithm
   1.227 +
   1.228 +		A = Build-Adjacency(I) Vertex-triangle adjacency
   1.229 +		L = Get-Triangle-Counts(A) Per-vertex live triangle counts
   1.230 +		C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
   1.231 +		D = Empty-Stack() Dead-end vertex stack
   1.232 +		E = False(Triangle-Count(I)) Per triangle emitted flag
   1.233 +		O = Empty-Index-Buffer() Empty output buffer
   1.234 +		f = 0 Arbitrary starting vertex
   1.235 +		s = k+1, i = 1 Time stamp and cursor
   1.236 +		while f >= 0 For all valid fanning vertices
   1.237 +			N = Empty-Set() 1-ring of next candidates
   1.238 +			for each Triangle t in Neighbors(A, f)
   1.239 +				if !Emitted(E,t)
   1.240 +					for each Vertex v in t
   1.241 +						Append(O,v) Output vertex
   1.242 +						Push(D,v) Add to dead-end stack
   1.243 +						Insert(N,v) Register as candidate
   1.244 +						L[v] = L[v]-1 Decrease live triangle count
   1.245 +						if s-C[v] > k If not in cache
   1.246 +							C[v] = s Set time stamp
   1.247 +							s = s+1 Increment time stamp
   1.248 +					E[t] = true Flag triangle as emitted
   1.249 +			Select next fanning vertex
   1.250 +			f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
   1.251 +		return O
   1.252 +		*/
   1.253 +	// ...................................................................................
   1.254 +
   1.255 +	int ivdx = 0;
   1.256 +	int ics = 1;
   1.257 +	int iStampCnt = configCacheDepth+1;
   1.258 +	while (ivdx >= 0)	{
   1.259 +
   1.260 +		unsigned int icnt = piNumTriPtrNoModify[ivdx]; 
   1.261 +		unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
   1.262 +		unsigned int* piCurCandidate = piCandidates;
   1.263 +
   1.264 +		// get all triangles in the neighborhood
   1.265 +		for (unsigned int tri = 0; tri < icnt;++tri)	{
   1.266 +
   1.267 +			// if they have not yet been emitted, add them to the output IB
   1.268 +			const unsigned int fidx = *piList++;
   1.269 +			if (!abEmitted[fidx])	{
   1.270 +
   1.271 +				// so iterate through all vertices of the current triangle
   1.272 +				const aiFace* pcFace = &pMesh->mFaces[ fidx ];
   1.273 +				for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p)	{
   1.274 +					const unsigned int dp = *p;
   1.275 +
   1.276 +					// the current vertex won't have any free triangles after this step
   1.277 +					if (ivdx != (int)dp) {
   1.278 +						// append the vertex to the dead-end stack
   1.279 +						sDeadEndVStack.push(dp);
   1.280 +
   1.281 +						// register as candidate for the next step
   1.282 +						*piCurCandidate++ = dp;
   1.283 +
   1.284 +						// decrease the per-vertex triangle counts
   1.285 +						piNumTriPtr[dp]--;
   1.286 +					}
   1.287 +
   1.288 +					// append the vertex to the output index buffer
   1.289 +					*piCSIter++ = dp;
   1.290 +
   1.291 +					// if the vertex is not yet in cache, set its cache count
   1.292 +					if (iStampCnt-piCachingStamps[dp] > configCacheDepth) {
   1.293 +						piCachingStamps[dp] = iStampCnt++;
   1.294 +						++iCacheMisses;
   1.295 +					}
   1.296 +				}
   1.297 +				// flag triangle as emitted
   1.298 +				abEmitted[fidx] = true;
   1.299 +			}
   1.300 +		}
   1.301 +
   1.302 +		// the vertex has now no living adjacent triangles anymore
   1.303 +		piNumTriPtr[ivdx] = 0;
   1.304 +
   1.305 +		// get next fanning vertex
   1.306 +		ivdx = -1; 
   1.307 +		int max_priority = -1;
   1.308 +		for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur)	{
   1.309 +			register const unsigned int dp = *piCur;
   1.310 +
   1.311 +			// must have live triangles
   1.312 +			if (piNumTriPtr[dp] > 0)	{
   1.313 +				int priority = 0;
   1.314 +
   1.315 +				// will the vertex be in cache, even after fanning occurs?
   1.316 +				unsigned int tmp;
   1.317 +				if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) {
   1.318 +					priority = tmp;
   1.319 +				}
   1.320 +
   1.321 +				// keep best candidate
   1.322 +				if (priority > max_priority) {
   1.323 +					max_priority = priority;
   1.324 +					ivdx = dp;
   1.325 +				}
   1.326 +			}
   1.327 +		}
   1.328 +		// did we reach a dead end?
   1.329 +		if (-1 == ivdx)	{
   1.330 +			// need to get a non-local vertex for which we have a good chance that it is still 
   1.331 +			// in the cache ...
   1.332 +			while (!sDeadEndVStack.empty())	{
   1.333 +				unsigned int iCachedIdx = sDeadEndVStack.top();
   1.334 +				sDeadEndVStack.pop();
   1.335 +				if (piNumTriPtr[ iCachedIdx ] > 0)	{
   1.336 +					ivdx = iCachedIdx;
   1.337 +					break;
   1.338 +				}
   1.339 +			}
   1.340 +
   1.341 +			if (-1 == ivdx)	{
   1.342 +				// well, there isn't such a vertex. Simply get the next vertex in input order and
   1.343 +				// hope it is not too bad ...
   1.344 +				while (ics < (int)pMesh->mNumVertices)	{
   1.345 +					++ics;
   1.346 +					if (piNumTriPtr[ics] > 0)	{
   1.347 +						ivdx = ics;
   1.348 +						break;
   1.349 +					}
   1.350 +				}
   1.351 +			}
   1.352 +		}
   1.353 +	}
   1.354 +	float fACMR2 = 0.0f;
   1.355 +	if (!DefaultLogger::isNullLogger()) {
   1.356 +		fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
   1.357 +
   1.358 +		// very intense verbose logging ... prepare for much text if there are many meshes
   1.359 +		if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
   1.360 +			char szBuff[128]; // should be sufficiently large in every case
   1.361 +
   1.362 +			::sprintf(szBuff,"Mesh %i | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2,
   1.363 +				((fACMR - fACMR2) / fACMR) * 100.f);
   1.364 +			DefaultLogger::get()->debug(szBuff);
   1.365 +		}
   1.366 +
   1.367 +		fACMR2 *= pMesh->mNumFaces;
   1.368 +	}
   1.369 +	// sort the output index buffer back to the input array
   1.370 +	piCSIter = piIBOutput;
   1.371 +	for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace)	{
   1.372 +		pcFace->mIndices[0] = *piCSIter++;
   1.373 +		pcFace->mIndices[1] = *piCSIter++;
   1.374 +		pcFace->mIndices[2] = *piCSIter++;
   1.375 +	}
   1.376 +
   1.377 +	// delete temporary storage
   1.378 +	delete[] piCachingStamps;
   1.379 +	delete[] piIBOutput;
   1.380 +	delete[] piCandidates;
   1.381 +
   1.382 +	return fACMR2;
   1.383 +}