nuclear@0: /* nuclear@0: --------------------------------------------------------------------------- nuclear@0: Open Asset Import Library (assimp) nuclear@0: --------------------------------------------------------------------------- nuclear@0: nuclear@0: Copyright (c) 2006-2012, assimp team nuclear@0: nuclear@0: All rights reserved. nuclear@0: nuclear@0: Redistribution and use of this software in source and binary forms, nuclear@0: with or without modification, are permitted provided that the following nuclear@0: conditions are met: nuclear@0: nuclear@0: * Redistributions of source code must retain the above nuclear@0: copyright notice, this list of conditions and the nuclear@0: following disclaimer. nuclear@0: nuclear@0: * Redistributions in binary form must reproduce the above nuclear@0: copyright notice, this list of conditions and the nuclear@0: following disclaimer in the documentation and/or other nuclear@0: materials provided with the distribution. nuclear@0: nuclear@0: * Neither the name of the assimp team, nor the names of its nuclear@0: contributors may be used to endorse or promote products nuclear@0: derived from this software without specific prior nuclear@0: written permission of the assimp team. nuclear@0: nuclear@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS nuclear@0: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT nuclear@0: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR nuclear@0: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT nuclear@0: OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, nuclear@0: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT nuclear@0: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, nuclear@0: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY nuclear@0: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT nuclear@0: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE nuclear@0: OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. nuclear@0: --------------------------------------------------------------------------- nuclear@0: */ nuclear@0: nuclear@0: /** @file Implementation of the post processing step to improve the cache locality of a mesh. nuclear@0: *
nuclear@0: * The algorithm is roughly basing on this paper: nuclear@0: * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf nuclear@0: * .. although overdraw rduction isn't implemented yet ... nuclear@0: */ nuclear@0: nuclear@0: #include "AssimpPCH.h" nuclear@0: nuclear@0: // internal headers nuclear@0: #include "ImproveCacheLocality.h" nuclear@0: #include "VertexTriangleAdjacency.h" nuclear@0: nuclear@0: using namespace Assimp; nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: // Constructor to be privately used by Importer nuclear@0: ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() { nuclear@0: configCacheDepth = PP_ICL_PTCACHE_SIZE; nuclear@0: } nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: // Destructor, private as well nuclear@0: ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess() nuclear@0: { nuclear@0: // nothing to do here nuclear@0: } nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: // Returns whether the processing step is present in the given flag field. nuclear@0: bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const nuclear@0: { nuclear@0: return (pFlags & aiProcess_ImproveCacheLocality) != 0; nuclear@0: } nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: // Setup configuration nuclear@0: void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) nuclear@0: { nuclear@0: // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer nuclear@0: configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE); nuclear@0: } nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: // Executes the post processing step on the given imported data. nuclear@0: void ImproveCacheLocalityProcess::Execute( aiScene* pScene) nuclear@0: { nuclear@0: if (!pScene->mNumMeshes) { nuclear@0: DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes"); nuclear@0: return; nuclear@0: } nuclear@0: nuclear@0: DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin"); nuclear@0: nuclear@0: float out = 0.f; nuclear@0: unsigned int numf = 0, numm = 0; nuclear@0: for( unsigned int a = 0; a < pScene->mNumMeshes; a++){ nuclear@0: const float res = ProcessMesh( pScene->mMeshes[a],a); nuclear@0: if (res) { nuclear@0: numf += pScene->mMeshes[a]->mNumFaces; nuclear@0: out += res; nuclear@0: ++numm; nuclear@0: } nuclear@0: } nuclear@0: if (!DefaultLogger::isNullLogger()) { nuclear@0: char szBuff[128]; // should be sufficiently large in every case nuclear@0: ::sprintf(szBuff,"Cache relevant are %i meshes (%i faces). Average output ACMR is %f", nuclear@0: numm,numf,out/numf); nuclear@0: nuclear@0: DefaultLogger::get()->info(szBuff); nuclear@0: DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. "); nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: // Improves the cache coherency of a specific mesh nuclear@0: float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) nuclear@0: { nuclear@0: // TODO: rewrite this to use std::vector or boost::shared_array nuclear@0: ai_assert(NULL != pMesh); nuclear@0: nuclear@0: // Check whether the input data is valid nuclear@0: // - there must be vertices and faces nuclear@0: // - all faces must be triangulated or we can't operate on them nuclear@0: if (!pMesh->HasFaces() || !pMesh->HasPositions()) nuclear@0: return 0.f; nuclear@0: nuclear@0: if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) { nuclear@0: DefaultLogger::get()->error("This algorithm works on triangle meshes only"); nuclear@0: return 0.f; nuclear@0: } nuclear@0: nuclear@0: if(pMesh->mNumVertices <= configCacheDepth) { nuclear@0: return 0.f; nuclear@0: } nuclear@0: nuclear@0: float fACMR = 3.f; nuclear@0: const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces; nuclear@0: nuclear@0: // Input ACMR is for logging purposes only nuclear@0: if (!DefaultLogger::isNullLogger()) { nuclear@0: nuclear@0: unsigned int* piFIFOStack = new unsigned int[configCacheDepth]; nuclear@0: memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int)); nuclear@0: unsigned int* piCur = piFIFOStack; nuclear@0: const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth; nuclear@0: nuclear@0: // count the number of cache misses nuclear@0: unsigned int iCacheMisses = 0; nuclear@0: for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) { nuclear@0: nuclear@0: for (unsigned int qq = 0; qq < 3;++qq) { nuclear@0: bool bInCache = false; nuclear@0: nuclear@0: for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) { nuclear@0: if (*pp == pcFace->mIndices[qq]) { nuclear@0: // the vertex is in cache nuclear@0: bInCache = true; nuclear@0: break; nuclear@0: } nuclear@0: } nuclear@0: if (!bInCache) { nuclear@0: ++iCacheMisses; nuclear@0: if (piCurEnd == piCur) { nuclear@0: piCur = piFIFOStack; nuclear@0: } nuclear@0: *piCur++ = pcFace->mIndices[qq]; nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: delete[] piFIFOStack; nuclear@0: fACMR = (float)iCacheMisses / pMesh->mNumFaces; nuclear@0: if (3.0 == fACMR) { nuclear@0: char szBuff[128]; // should be sufficiently large in every case nuclear@0: nuclear@0: // the JoinIdenticalVertices process has not been executed on this nuclear@0: // mesh, otherwise this value would normally be at least minimally nuclear@0: // smaller than 3.0 ... nuclear@0: sprintf(szBuff,"Mesh %i: Not suitable for vcache optimization",meshNum); nuclear@0: DefaultLogger::get()->warn(szBuff); nuclear@0: return 0.f; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: // first we need to build a vertex-triangle adjacency list nuclear@0: VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true); nuclear@0: nuclear@0: // build a list to store per-vertex caching time stamps nuclear@0: unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices]; nuclear@0: memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int)); nuclear@0: nuclear@0: // allocate an empty output index buffer. We store the output indices in one large array. nuclear@0: // Since the number of triangles won't change the input faces can be reused. This is how nuclear@0: // we save thousands of redundant mini allocations for aiFace::mIndices nuclear@0: const unsigned int iIdxCnt = pMesh->mNumFaces*3; nuclear@0: unsigned int* const piIBOutput = new unsigned int[iIdxCnt]; nuclear@0: unsigned int* piCSIter = piIBOutput; nuclear@0: nuclear@0: // allocate the flag array to hold the information nuclear@0: // whether a face has already been emitted or not nuclear@0: std::vector abEmitted(pMesh->mNumFaces,false); nuclear@0: nuclear@0: // dead-end vertex index stack nuclear@0: std::stack > sDeadEndVStack; nuclear@0: nuclear@0: // create a copy of the piNumTriPtr buffer nuclear@0: unsigned int* const piNumTriPtr = adj.mLiveTriangles; nuclear@0: const std::vector piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices); nuclear@0: nuclear@0: // get the largest number of referenced triangles and allocate the "candidate buffer" nuclear@0: unsigned int iMaxRefTris = 0; { nuclear@0: const unsigned int* piCur = adj.mLiveTriangles; nuclear@0: const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices; nuclear@0: for (;piCur != piCurEnd;++piCur) { nuclear@0: iMaxRefTris = std::max(iMaxRefTris,*piCur); nuclear@0: } nuclear@0: } nuclear@0: unsigned int* piCandidates = new unsigned int[iMaxRefTris*3]; nuclear@0: unsigned int iCacheMisses = 0; nuclear@0: nuclear@0: // ................................................................................... nuclear@0: /** PSEUDOCODE for the algorithm nuclear@0: nuclear@0: A = Build-Adjacency(I) Vertex-triangle adjacency nuclear@0: L = Get-Triangle-Counts(A) Per-vertex live triangle counts nuclear@0: C = Zero(Vertex-Count(I)) Per-vertex caching time stamps nuclear@0: D = Empty-Stack() Dead-end vertex stack nuclear@0: E = False(Triangle-Count(I)) Per triangle emitted flag nuclear@0: O = Empty-Index-Buffer() Empty output buffer nuclear@0: f = 0 Arbitrary starting vertex nuclear@0: s = k+1, i = 1 Time stamp and cursor nuclear@0: while f >= 0 For all valid fanning vertices nuclear@0: N = Empty-Set() 1-ring of next candidates nuclear@0: for each Triangle t in Neighbors(A, f) nuclear@0: if !Emitted(E,t) nuclear@0: for each Vertex v in t nuclear@0: Append(O,v) Output vertex nuclear@0: Push(D,v) Add to dead-end stack nuclear@0: Insert(N,v) Register as candidate nuclear@0: L[v] = L[v]-1 Decrease live triangle count nuclear@0: if s-C[v] > k If not in cache nuclear@0: C[v] = s Set time stamp nuclear@0: s = s+1 Increment time stamp nuclear@0: E[t] = true Flag triangle as emitted nuclear@0: Select next fanning vertex nuclear@0: f = Get-Next-Vertex(I,i,k,N,C,s,L,D) nuclear@0: return O nuclear@0: */ nuclear@0: // ................................................................................... nuclear@0: nuclear@0: int ivdx = 0; nuclear@0: int ics = 1; nuclear@0: int iStampCnt = configCacheDepth+1; nuclear@0: while (ivdx >= 0) { nuclear@0: nuclear@0: unsigned int icnt = piNumTriPtrNoModify[ivdx]; nuclear@0: unsigned int* piList = adj.GetAdjacentTriangles(ivdx); nuclear@0: unsigned int* piCurCandidate = piCandidates; nuclear@0: nuclear@0: // get all triangles in the neighborhood nuclear@0: for (unsigned int tri = 0; tri < icnt;++tri) { nuclear@0: nuclear@0: // if they have not yet been emitted, add them to the output IB nuclear@0: const unsigned int fidx = *piList++; nuclear@0: if (!abEmitted[fidx]) { nuclear@0: nuclear@0: // so iterate through all vertices of the current triangle nuclear@0: const aiFace* pcFace = &pMesh->mFaces[ fidx ]; nuclear@0: for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p) { nuclear@0: const unsigned int dp = *p; nuclear@0: nuclear@0: // the current vertex won't have any free triangles after this step nuclear@0: if (ivdx != (int)dp) { nuclear@0: // append the vertex to the dead-end stack nuclear@0: sDeadEndVStack.push(dp); nuclear@0: nuclear@0: // register as candidate for the next step nuclear@0: *piCurCandidate++ = dp; nuclear@0: nuclear@0: // decrease the per-vertex triangle counts nuclear@0: piNumTriPtr[dp]--; nuclear@0: } nuclear@0: nuclear@0: // append the vertex to the output index buffer nuclear@0: *piCSIter++ = dp; nuclear@0: nuclear@0: // if the vertex is not yet in cache, set its cache count nuclear@0: if (iStampCnt-piCachingStamps[dp] > configCacheDepth) { nuclear@0: piCachingStamps[dp] = iStampCnt++; nuclear@0: ++iCacheMisses; nuclear@0: } nuclear@0: } nuclear@0: // flag triangle as emitted nuclear@0: abEmitted[fidx] = true; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: // the vertex has now no living adjacent triangles anymore nuclear@0: piNumTriPtr[ivdx] = 0; nuclear@0: nuclear@0: // get next fanning vertex nuclear@0: ivdx = -1; nuclear@0: int max_priority = -1; nuclear@0: for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) { nuclear@0: register const unsigned int dp = *piCur; nuclear@0: nuclear@0: // must have live triangles nuclear@0: if (piNumTriPtr[dp] > 0) { nuclear@0: int priority = 0; nuclear@0: nuclear@0: // will the vertex be in cache, even after fanning occurs? nuclear@0: unsigned int tmp; nuclear@0: if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) { nuclear@0: priority = tmp; nuclear@0: } nuclear@0: nuclear@0: // keep best candidate nuclear@0: if (priority > max_priority) { nuclear@0: max_priority = priority; nuclear@0: ivdx = dp; nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: // did we reach a dead end? nuclear@0: if (-1 == ivdx) { nuclear@0: // need to get a non-local vertex for which we have a good chance that it is still nuclear@0: // in the cache ... nuclear@0: while (!sDeadEndVStack.empty()) { nuclear@0: unsigned int iCachedIdx = sDeadEndVStack.top(); nuclear@0: sDeadEndVStack.pop(); nuclear@0: if (piNumTriPtr[ iCachedIdx ] > 0) { nuclear@0: ivdx = iCachedIdx; nuclear@0: break; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: if (-1 == ivdx) { nuclear@0: // well, there isn't such a vertex. Simply get the next vertex in input order and nuclear@0: // hope it is not too bad ... nuclear@0: while (ics < (int)pMesh->mNumVertices) { nuclear@0: ++ics; nuclear@0: if (piNumTriPtr[ics] > 0) { nuclear@0: ivdx = ics; nuclear@0: break; nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: float fACMR2 = 0.0f; nuclear@0: if (!DefaultLogger::isNullLogger()) { nuclear@0: fACMR2 = (float)iCacheMisses / pMesh->mNumFaces; nuclear@0: nuclear@0: // very intense verbose logging ... prepare for much text if there are many meshes nuclear@0: if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) { nuclear@0: char szBuff[128]; // should be sufficiently large in every case nuclear@0: nuclear@0: ::sprintf(szBuff,"Mesh %i | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2, nuclear@0: ((fACMR - fACMR2) / fACMR) * 100.f); nuclear@0: DefaultLogger::get()->debug(szBuff); nuclear@0: } nuclear@0: nuclear@0: fACMR2 *= pMesh->mNumFaces; nuclear@0: } nuclear@0: // sort the output index buffer back to the input array nuclear@0: piCSIter = piIBOutput; nuclear@0: for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) { nuclear@0: pcFace->mIndices[0] = *piCSIter++; nuclear@0: pcFace->mIndices[1] = *piCSIter++; nuclear@0: pcFace->mIndices[2] = *piCSIter++; nuclear@0: } nuclear@0: nuclear@0: // delete temporary storage nuclear@0: delete[] piCachingStamps; nuclear@0: delete[] piIBOutput; nuclear@0: delete[] piCandidates; nuclear@0: nuclear@0: return fACMR2; nuclear@0: }