vrshoot
diff libs/assimp/ImproveCacheLocality.cpp @ 0:b2f14e535253
initial commit
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 01 Feb 2014 19:58:19 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libs/assimp/ImproveCacheLocality.cpp Sat Feb 01 19:58:19 2014 +0200 1.3 @@ -0,0 +1,380 @@ 1.4 +/* 1.5 +--------------------------------------------------------------------------- 1.6 +Open Asset Import Library (assimp) 1.7 +--------------------------------------------------------------------------- 1.8 + 1.9 +Copyright (c) 2006-2012, assimp team 1.10 + 1.11 +All rights reserved. 1.12 + 1.13 +Redistribution and use of this software in source and binary forms, 1.14 +with or without modification, are permitted provided that the following 1.15 +conditions are met: 1.16 + 1.17 +* Redistributions of source code must retain the above 1.18 + copyright notice, this list of conditions and the 1.19 + following disclaimer. 1.20 + 1.21 +* Redistributions in binary form must reproduce the above 1.22 + copyright notice, this list of conditions and the 1.23 + following disclaimer in the documentation and/or other 1.24 + materials provided with the distribution. 1.25 + 1.26 +* Neither the name of the assimp team, nor the names of its 1.27 + contributors may be used to endorse or promote products 1.28 + derived from this software without specific prior 1.29 + written permission of the assimp team. 1.30 + 1.31 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.32 +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.33 +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.34 +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.35 +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.36 +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.37 +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.38 +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.39 +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.40 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.41 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.42 +--------------------------------------------------------------------------- 1.43 +*/ 1.44 + 1.45 +/** @file Implementation of the post processing step to improve the cache locality of a mesh. 1.46 + * <br> 1.47 + * The algorithm is roughly basing on this paper: 1.48 + * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf 1.49 + * .. although overdraw rduction isn't implemented yet ... 1.50 + */ 1.51 + 1.52 +#include "AssimpPCH.h" 1.53 + 1.54 +// internal headers 1.55 +#include "ImproveCacheLocality.h" 1.56 +#include "VertexTriangleAdjacency.h" 1.57 + 1.58 +using namespace Assimp; 1.59 + 1.60 +// ------------------------------------------------------------------------------------------------ 1.61 +// Constructor to be privately used by Importer 1.62 +ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() { 1.63 + configCacheDepth = PP_ICL_PTCACHE_SIZE; 1.64 +} 1.65 + 1.66 +// ------------------------------------------------------------------------------------------------ 1.67 +// Destructor, private as well 1.68 +ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess() 1.69 +{ 1.70 + // nothing to do here 1.71 +} 1.72 + 1.73 +// ------------------------------------------------------------------------------------------------ 1.74 +// Returns whether the processing step is present in the given flag field. 1.75 +bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const 1.76 +{ 1.77 + return (pFlags & aiProcess_ImproveCacheLocality) != 0; 1.78 +} 1.79 + 1.80 +// ------------------------------------------------------------------------------------------------ 1.81 +// Setup configuration 1.82 +void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) 1.83 +{ 1.84 + // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer 1.85 + configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE); 1.86 +} 1.87 + 1.88 +// ------------------------------------------------------------------------------------------------ 1.89 +// Executes the post processing step on the given imported data. 1.90 +void ImproveCacheLocalityProcess::Execute( aiScene* pScene) 1.91 +{ 1.92 + if (!pScene->mNumMeshes) { 1.93 + DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes"); 1.94 + return; 1.95 + } 1.96 + 1.97 + DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin"); 1.98 + 1.99 + float out = 0.f; 1.100 + unsigned int numf = 0, numm = 0; 1.101 + for( unsigned int a = 0; a < pScene->mNumMeshes; a++){ 1.102 + const float res = ProcessMesh( pScene->mMeshes[a],a); 1.103 + if (res) { 1.104 + numf += pScene->mMeshes[a]->mNumFaces; 1.105 + out += res; 1.106 + ++numm; 1.107 + } 1.108 + } 1.109 + if (!DefaultLogger::isNullLogger()) { 1.110 + char szBuff[128]; // should be sufficiently large in every case 1.111 + ::sprintf(szBuff,"Cache relevant are %i meshes (%i faces). Average output ACMR is %f", 1.112 + numm,numf,out/numf); 1.113 + 1.114 + DefaultLogger::get()->info(szBuff); 1.115 + DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. "); 1.116 + } 1.117 +} 1.118 + 1.119 +// ------------------------------------------------------------------------------------------------ 1.120 +// Improves the cache coherency of a specific mesh 1.121 +float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) 1.122 +{ 1.123 + // TODO: rewrite this to use std::vector or boost::shared_array 1.124 + ai_assert(NULL != pMesh); 1.125 + 1.126 + // Check whether the input data is valid 1.127 + // - there must be vertices and faces 1.128 + // - all faces must be triangulated or we can't operate on them 1.129 + if (!pMesh->HasFaces() || !pMesh->HasPositions()) 1.130 + return 0.f; 1.131 + 1.132 + if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) { 1.133 + DefaultLogger::get()->error("This algorithm works on triangle meshes only"); 1.134 + return 0.f; 1.135 + } 1.136 + 1.137 + if(pMesh->mNumVertices <= configCacheDepth) { 1.138 + return 0.f; 1.139 + } 1.140 + 1.141 + float fACMR = 3.f; 1.142 + const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces; 1.143 + 1.144 + // Input ACMR is for logging purposes only 1.145 + if (!DefaultLogger::isNullLogger()) { 1.146 + 1.147 + unsigned int* piFIFOStack = new unsigned int[configCacheDepth]; 1.148 + memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int)); 1.149 + unsigned int* piCur = piFIFOStack; 1.150 + const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth; 1.151 + 1.152 + // count the number of cache misses 1.153 + unsigned int iCacheMisses = 0; 1.154 + for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) { 1.155 + 1.156 + for (unsigned int qq = 0; qq < 3;++qq) { 1.157 + bool bInCache = false; 1.158 + 1.159 + for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) { 1.160 + if (*pp == pcFace->mIndices[qq]) { 1.161 + // the vertex is in cache 1.162 + bInCache = true; 1.163 + break; 1.164 + } 1.165 + } 1.166 + if (!bInCache) { 1.167 + ++iCacheMisses; 1.168 + if (piCurEnd == piCur) { 1.169 + piCur = piFIFOStack; 1.170 + } 1.171 + *piCur++ = pcFace->mIndices[qq]; 1.172 + } 1.173 + } 1.174 + } 1.175 + delete[] piFIFOStack; 1.176 + fACMR = (float)iCacheMisses / pMesh->mNumFaces; 1.177 + if (3.0 == fACMR) { 1.178 + char szBuff[128]; // should be sufficiently large in every case 1.179 + 1.180 + // the JoinIdenticalVertices process has not been executed on this 1.181 + // mesh, otherwise this value would normally be at least minimally 1.182 + // smaller than 3.0 ... 1.183 + sprintf(szBuff,"Mesh %i: Not suitable for vcache optimization",meshNum); 1.184 + DefaultLogger::get()->warn(szBuff); 1.185 + return 0.f; 1.186 + } 1.187 + } 1.188 + 1.189 + // first we need to build a vertex-triangle adjacency list 1.190 + VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true); 1.191 + 1.192 + // build a list to store per-vertex caching time stamps 1.193 + unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices]; 1.194 + memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int)); 1.195 + 1.196 + // allocate an empty output index buffer. We store the output indices in one large array. 1.197 + // Since the number of triangles won't change the input faces can be reused. This is how 1.198 + // we save thousands of redundant mini allocations for aiFace::mIndices 1.199 + const unsigned int iIdxCnt = pMesh->mNumFaces*3; 1.200 + unsigned int* const piIBOutput = new unsigned int[iIdxCnt]; 1.201 + unsigned int* piCSIter = piIBOutput; 1.202 + 1.203 + // allocate the flag array to hold the information 1.204 + // whether a face has already been emitted or not 1.205 + std::vector<bool> abEmitted(pMesh->mNumFaces,false); 1.206 + 1.207 + // dead-end vertex index stack 1.208 + std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack; 1.209 + 1.210 + // create a copy of the piNumTriPtr buffer 1.211 + unsigned int* const piNumTriPtr = adj.mLiveTriangles; 1.212 + const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices); 1.213 + 1.214 + // get the largest number of referenced triangles and allocate the "candidate buffer" 1.215 + unsigned int iMaxRefTris = 0; { 1.216 + const unsigned int* piCur = adj.mLiveTriangles; 1.217 + const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices; 1.218 + for (;piCur != piCurEnd;++piCur) { 1.219 + iMaxRefTris = std::max(iMaxRefTris,*piCur); 1.220 + } 1.221 + } 1.222 + unsigned int* piCandidates = new unsigned int[iMaxRefTris*3]; 1.223 + unsigned int iCacheMisses = 0; 1.224 + 1.225 + // ................................................................................... 1.226 + /** PSEUDOCODE for the algorithm 1.227 + 1.228 + A = Build-Adjacency(I) Vertex-triangle adjacency 1.229 + L = Get-Triangle-Counts(A) Per-vertex live triangle counts 1.230 + C = Zero(Vertex-Count(I)) Per-vertex caching time stamps 1.231 + D = Empty-Stack() Dead-end vertex stack 1.232 + E = False(Triangle-Count(I)) Per triangle emitted flag 1.233 + O = Empty-Index-Buffer() Empty output buffer 1.234 + f = 0 Arbitrary starting vertex 1.235 + s = k+1, i = 1 Time stamp and cursor 1.236 + while f >= 0 For all valid fanning vertices 1.237 + N = Empty-Set() 1-ring of next candidates 1.238 + for each Triangle t in Neighbors(A, f) 1.239 + if !Emitted(E,t) 1.240 + for each Vertex v in t 1.241 + Append(O,v) Output vertex 1.242 + Push(D,v) Add to dead-end stack 1.243 + Insert(N,v) Register as candidate 1.244 + L[v] = L[v]-1 Decrease live triangle count 1.245 + if s-C[v] > k If not in cache 1.246 + C[v] = s Set time stamp 1.247 + s = s+1 Increment time stamp 1.248 + E[t] = true Flag triangle as emitted 1.249 + Select next fanning vertex 1.250 + f = Get-Next-Vertex(I,i,k,N,C,s,L,D) 1.251 + return O 1.252 + */ 1.253 + // ................................................................................... 1.254 + 1.255 + int ivdx = 0; 1.256 + int ics = 1; 1.257 + int iStampCnt = configCacheDepth+1; 1.258 + while (ivdx >= 0) { 1.259 + 1.260 + unsigned int icnt = piNumTriPtrNoModify[ivdx]; 1.261 + unsigned int* piList = adj.GetAdjacentTriangles(ivdx); 1.262 + unsigned int* piCurCandidate = piCandidates; 1.263 + 1.264 + // get all triangles in the neighborhood 1.265 + for (unsigned int tri = 0; tri < icnt;++tri) { 1.266 + 1.267 + // if they have not yet been emitted, add them to the output IB 1.268 + const unsigned int fidx = *piList++; 1.269 + if (!abEmitted[fidx]) { 1.270 + 1.271 + // so iterate through all vertices of the current triangle 1.272 + const aiFace* pcFace = &pMesh->mFaces[ fidx ]; 1.273 + for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p) { 1.274 + const unsigned int dp = *p; 1.275 + 1.276 + // the current vertex won't have any free triangles after this step 1.277 + if (ivdx != (int)dp) { 1.278 + // append the vertex to the dead-end stack 1.279 + sDeadEndVStack.push(dp); 1.280 + 1.281 + // register as candidate for the next step 1.282 + *piCurCandidate++ = dp; 1.283 + 1.284 + // decrease the per-vertex triangle counts 1.285 + piNumTriPtr[dp]--; 1.286 + } 1.287 + 1.288 + // append the vertex to the output index buffer 1.289 + *piCSIter++ = dp; 1.290 + 1.291 + // if the vertex is not yet in cache, set its cache count 1.292 + if (iStampCnt-piCachingStamps[dp] > configCacheDepth) { 1.293 + piCachingStamps[dp] = iStampCnt++; 1.294 + ++iCacheMisses; 1.295 + } 1.296 + } 1.297 + // flag triangle as emitted 1.298 + abEmitted[fidx] = true; 1.299 + } 1.300 + } 1.301 + 1.302 + // the vertex has now no living adjacent triangles anymore 1.303 + piNumTriPtr[ivdx] = 0; 1.304 + 1.305 + // get next fanning vertex 1.306 + ivdx = -1; 1.307 + int max_priority = -1; 1.308 + for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) { 1.309 + register const unsigned int dp = *piCur; 1.310 + 1.311 + // must have live triangles 1.312 + if (piNumTriPtr[dp] > 0) { 1.313 + int priority = 0; 1.314 + 1.315 + // will the vertex be in cache, even after fanning occurs? 1.316 + unsigned int tmp; 1.317 + if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) { 1.318 + priority = tmp; 1.319 + } 1.320 + 1.321 + // keep best candidate 1.322 + if (priority > max_priority) { 1.323 + max_priority = priority; 1.324 + ivdx = dp; 1.325 + } 1.326 + } 1.327 + } 1.328 + // did we reach a dead end? 1.329 + if (-1 == ivdx) { 1.330 + // need to get a non-local vertex for which we have a good chance that it is still 1.331 + // in the cache ... 1.332 + while (!sDeadEndVStack.empty()) { 1.333 + unsigned int iCachedIdx = sDeadEndVStack.top(); 1.334 + sDeadEndVStack.pop(); 1.335 + if (piNumTriPtr[ iCachedIdx ] > 0) { 1.336 + ivdx = iCachedIdx; 1.337 + break; 1.338 + } 1.339 + } 1.340 + 1.341 + if (-1 == ivdx) { 1.342 + // well, there isn't such a vertex. Simply get the next vertex in input order and 1.343 + // hope it is not too bad ... 1.344 + while (ics < (int)pMesh->mNumVertices) { 1.345 + ++ics; 1.346 + if (piNumTriPtr[ics] > 0) { 1.347 + ivdx = ics; 1.348 + break; 1.349 + } 1.350 + } 1.351 + } 1.352 + } 1.353 + } 1.354 + float fACMR2 = 0.0f; 1.355 + if (!DefaultLogger::isNullLogger()) { 1.356 + fACMR2 = (float)iCacheMisses / pMesh->mNumFaces; 1.357 + 1.358 + // very intense verbose logging ... prepare for much text if there are many meshes 1.359 + if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) { 1.360 + char szBuff[128]; // should be sufficiently large in every case 1.361 + 1.362 + ::sprintf(szBuff,"Mesh %i | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2, 1.363 + ((fACMR - fACMR2) / fACMR) * 100.f); 1.364 + DefaultLogger::get()->debug(szBuff); 1.365 + } 1.366 + 1.367 + fACMR2 *= pMesh->mNumFaces; 1.368 + } 1.369 + // sort the output index buffer back to the input array 1.370 + piCSIter = piIBOutput; 1.371 + for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) { 1.372 + pcFace->mIndices[0] = *piCSIter++; 1.373 + pcFace->mIndices[1] = *piCSIter++; 1.374 + pcFace->mIndices[2] = *piCSIter++; 1.375 + } 1.376 + 1.377 + // delete temporary storage 1.378 + delete[] piCachingStamps; 1.379 + delete[] piIBOutput; 1.380 + delete[] piCandidates; 1.381 + 1.382 + return fACMR2; 1.383 +}