nuclear@0: /*
nuclear@0: ---------------------------------------------------------------------------
nuclear@0: Open Asset Import Library (assimp)
nuclear@0: ---------------------------------------------------------------------------
nuclear@0:
nuclear@0: Copyright (c) 2006-2012, assimp team
nuclear@0:
nuclear@0: All rights reserved.
nuclear@0:
nuclear@0: Redistribution and use of this software in source and binary forms,
nuclear@0: with or without modification, are permitted provided that the following
nuclear@0: conditions are met:
nuclear@0:
nuclear@0: * Redistributions of source code must retain the above
nuclear@0: copyright notice, this list of conditions and the
nuclear@0: following disclaimer.
nuclear@0:
nuclear@0: * Redistributions in binary form must reproduce the above
nuclear@0: copyright notice, this list of conditions and the
nuclear@0: following disclaimer in the documentation and/or other
nuclear@0: materials provided with the distribution.
nuclear@0:
nuclear@0: * Neither the name of the assimp team, nor the names of its
nuclear@0: contributors may be used to endorse or promote products
nuclear@0: derived from this software without specific prior
nuclear@0: written permission of the assimp team.
nuclear@0:
nuclear@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
nuclear@0: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
nuclear@0: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
nuclear@0: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
nuclear@0: OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
nuclear@0: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
nuclear@0: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
nuclear@0: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
nuclear@0: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
nuclear@0: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
nuclear@0: OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
nuclear@0: ---------------------------------------------------------------------------
nuclear@0: */
nuclear@0:
nuclear@0: /** @file Implementation of the post processing step to improve the cache locality of a mesh.
nuclear@0: *
nuclear@0: * The algorithm is roughly basing on this paper:
nuclear@0: * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
nuclear@0: * .. although overdraw rduction isn't implemented yet ...
nuclear@0: */
nuclear@0:
nuclear@0: #include "AssimpPCH.h"
nuclear@0:
nuclear@0: // internal headers
nuclear@0: #include "ImproveCacheLocality.h"
nuclear@0: #include "VertexTriangleAdjacency.h"
nuclear@0:
nuclear@0: using namespace Assimp;
nuclear@0:
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: // Constructor to be privately used by Importer
nuclear@0: ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() {
nuclear@0: configCacheDepth = PP_ICL_PTCACHE_SIZE;
nuclear@0: }
nuclear@0:
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: // Destructor, private as well
nuclear@0: ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess()
nuclear@0: {
nuclear@0: // nothing to do here
nuclear@0: }
nuclear@0:
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: // Returns whether the processing step is present in the given flag field.
nuclear@0: bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const
nuclear@0: {
nuclear@0: return (pFlags & aiProcess_ImproveCacheLocality) != 0;
nuclear@0: }
nuclear@0:
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: // Setup configuration
nuclear@0: void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp)
nuclear@0: {
nuclear@0: // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
nuclear@0: configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
nuclear@0: }
nuclear@0:
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: // Executes the post processing step on the given imported data.
nuclear@0: void ImproveCacheLocalityProcess::Execute( aiScene* pScene)
nuclear@0: {
nuclear@0: if (!pScene->mNumMeshes) {
nuclear@0: DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes");
nuclear@0: return;
nuclear@0: }
nuclear@0:
nuclear@0: DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin");
nuclear@0:
nuclear@0: float out = 0.f;
nuclear@0: unsigned int numf = 0, numm = 0;
nuclear@0: for( unsigned int a = 0; a < pScene->mNumMeshes; a++){
nuclear@0: const float res = ProcessMesh( pScene->mMeshes[a],a);
nuclear@0: if (res) {
nuclear@0: numf += pScene->mMeshes[a]->mNumFaces;
nuclear@0: out += res;
nuclear@0: ++numm;
nuclear@0: }
nuclear@0: }
nuclear@0: if (!DefaultLogger::isNullLogger()) {
nuclear@0: char szBuff[128]; // should be sufficiently large in every case
nuclear@0: ::sprintf(szBuff,"Cache relevant are %i meshes (%i faces). Average output ACMR is %f",
nuclear@0: numm,numf,out/numf);
nuclear@0:
nuclear@0: DefaultLogger::get()->info(szBuff);
nuclear@0: DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. ");
nuclear@0: }
nuclear@0: }
nuclear@0:
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: // Improves the cache coherency of a specific mesh
nuclear@0: float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum)
nuclear@0: {
nuclear@0: // TODO: rewrite this to use std::vector or boost::shared_array
nuclear@0: ai_assert(NULL != pMesh);
nuclear@0:
nuclear@0: // Check whether the input data is valid
nuclear@0: // - there must be vertices and faces
nuclear@0: // - all faces must be triangulated or we can't operate on them
nuclear@0: if (!pMesh->HasFaces() || !pMesh->HasPositions())
nuclear@0: return 0.f;
nuclear@0:
nuclear@0: if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
nuclear@0: DefaultLogger::get()->error("This algorithm works on triangle meshes only");
nuclear@0: return 0.f;
nuclear@0: }
nuclear@0:
nuclear@0: if(pMesh->mNumVertices <= configCacheDepth) {
nuclear@0: return 0.f;
nuclear@0: }
nuclear@0:
nuclear@0: float fACMR = 3.f;
nuclear@0: const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
nuclear@0:
nuclear@0: // Input ACMR is for logging purposes only
nuclear@0: if (!DefaultLogger::isNullLogger()) {
nuclear@0:
nuclear@0: unsigned int* piFIFOStack = new unsigned int[configCacheDepth];
nuclear@0: memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int));
nuclear@0: unsigned int* piCur = piFIFOStack;
nuclear@0: const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth;
nuclear@0:
nuclear@0: // count the number of cache misses
nuclear@0: unsigned int iCacheMisses = 0;
nuclear@0: for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
nuclear@0:
nuclear@0: for (unsigned int qq = 0; qq < 3;++qq) {
nuclear@0: bool bInCache = false;
nuclear@0:
nuclear@0: for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
nuclear@0: if (*pp == pcFace->mIndices[qq]) {
nuclear@0: // the vertex is in cache
nuclear@0: bInCache = true;
nuclear@0: break;
nuclear@0: }
nuclear@0: }
nuclear@0: if (!bInCache) {
nuclear@0: ++iCacheMisses;
nuclear@0: if (piCurEnd == piCur) {
nuclear@0: piCur = piFIFOStack;
nuclear@0: }
nuclear@0: *piCur++ = pcFace->mIndices[qq];
nuclear@0: }
nuclear@0: }
nuclear@0: }
nuclear@0: delete[] piFIFOStack;
nuclear@0: fACMR = (float)iCacheMisses / pMesh->mNumFaces;
nuclear@0: if (3.0 == fACMR) {
nuclear@0: char szBuff[128]; // should be sufficiently large in every case
nuclear@0:
nuclear@0: // the JoinIdenticalVertices process has not been executed on this
nuclear@0: // mesh, otherwise this value would normally be at least minimally
nuclear@0: // smaller than 3.0 ...
nuclear@0: sprintf(szBuff,"Mesh %i: Not suitable for vcache optimization",meshNum);
nuclear@0: DefaultLogger::get()->warn(szBuff);
nuclear@0: return 0.f;
nuclear@0: }
nuclear@0: }
nuclear@0:
nuclear@0: // first we need to build a vertex-triangle adjacency list
nuclear@0: VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
nuclear@0:
nuclear@0: // build a list to store per-vertex caching time stamps
nuclear@0: unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
nuclear@0: memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
nuclear@0:
nuclear@0: // allocate an empty output index buffer. We store the output indices in one large array.
nuclear@0: // Since the number of triangles won't change the input faces can be reused. This is how
nuclear@0: // we save thousands of redundant mini allocations for aiFace::mIndices
nuclear@0: const unsigned int iIdxCnt = pMesh->mNumFaces*3;
nuclear@0: unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
nuclear@0: unsigned int* piCSIter = piIBOutput;
nuclear@0:
nuclear@0: // allocate the flag array to hold the information
nuclear@0: // whether a face has already been emitted or not
nuclear@0: std::vector abEmitted(pMesh->mNumFaces,false);
nuclear@0:
nuclear@0: // dead-end vertex index stack
nuclear@0: std::stack > sDeadEndVStack;
nuclear@0:
nuclear@0: // create a copy of the piNumTriPtr buffer
nuclear@0: unsigned int* const piNumTriPtr = adj.mLiveTriangles;
nuclear@0: const std::vector piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
nuclear@0:
nuclear@0: // get the largest number of referenced triangles and allocate the "candidate buffer"
nuclear@0: unsigned int iMaxRefTris = 0; {
nuclear@0: const unsigned int* piCur = adj.mLiveTriangles;
nuclear@0: const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
nuclear@0: for (;piCur != piCurEnd;++piCur) {
nuclear@0: iMaxRefTris = std::max(iMaxRefTris,*piCur);
nuclear@0: }
nuclear@0: }
nuclear@0: unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
nuclear@0: unsigned int iCacheMisses = 0;
nuclear@0:
nuclear@0: // ...................................................................................
nuclear@0: /** PSEUDOCODE for the algorithm
nuclear@0:
nuclear@0: A = Build-Adjacency(I) Vertex-triangle adjacency
nuclear@0: L = Get-Triangle-Counts(A) Per-vertex live triangle counts
nuclear@0: C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
nuclear@0: D = Empty-Stack() Dead-end vertex stack
nuclear@0: E = False(Triangle-Count(I)) Per triangle emitted flag
nuclear@0: O = Empty-Index-Buffer() Empty output buffer
nuclear@0: f = 0 Arbitrary starting vertex
nuclear@0: s = k+1, i = 1 Time stamp and cursor
nuclear@0: while f >= 0 For all valid fanning vertices
nuclear@0: N = Empty-Set() 1-ring of next candidates
nuclear@0: for each Triangle t in Neighbors(A, f)
nuclear@0: if !Emitted(E,t)
nuclear@0: for each Vertex v in t
nuclear@0: Append(O,v) Output vertex
nuclear@0: Push(D,v) Add to dead-end stack
nuclear@0: Insert(N,v) Register as candidate
nuclear@0: L[v] = L[v]-1 Decrease live triangle count
nuclear@0: if s-C[v] > k If not in cache
nuclear@0: C[v] = s Set time stamp
nuclear@0: s = s+1 Increment time stamp
nuclear@0: E[t] = true Flag triangle as emitted
nuclear@0: Select next fanning vertex
nuclear@0: f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
nuclear@0: return O
nuclear@0: */
nuclear@0: // ...................................................................................
nuclear@0:
nuclear@0: int ivdx = 0;
nuclear@0: int ics = 1;
nuclear@0: int iStampCnt = configCacheDepth+1;
nuclear@0: while (ivdx >= 0) {
nuclear@0:
nuclear@0: unsigned int icnt = piNumTriPtrNoModify[ivdx];
nuclear@0: unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
nuclear@0: unsigned int* piCurCandidate = piCandidates;
nuclear@0:
nuclear@0: // get all triangles in the neighborhood
nuclear@0: for (unsigned int tri = 0; tri < icnt;++tri) {
nuclear@0:
nuclear@0: // if they have not yet been emitted, add them to the output IB
nuclear@0: const unsigned int fidx = *piList++;
nuclear@0: if (!abEmitted[fidx]) {
nuclear@0:
nuclear@0: // so iterate through all vertices of the current triangle
nuclear@0: const aiFace* pcFace = &pMesh->mFaces[ fidx ];
nuclear@0: for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p) {
nuclear@0: const unsigned int dp = *p;
nuclear@0:
nuclear@0: // the current vertex won't have any free triangles after this step
nuclear@0: if (ivdx != (int)dp) {
nuclear@0: // append the vertex to the dead-end stack
nuclear@0: sDeadEndVStack.push(dp);
nuclear@0:
nuclear@0: // register as candidate for the next step
nuclear@0: *piCurCandidate++ = dp;
nuclear@0:
nuclear@0: // decrease the per-vertex triangle counts
nuclear@0: piNumTriPtr[dp]--;
nuclear@0: }
nuclear@0:
nuclear@0: // append the vertex to the output index buffer
nuclear@0: *piCSIter++ = dp;
nuclear@0:
nuclear@0: // if the vertex is not yet in cache, set its cache count
nuclear@0: if (iStampCnt-piCachingStamps[dp] > configCacheDepth) {
nuclear@0: piCachingStamps[dp] = iStampCnt++;
nuclear@0: ++iCacheMisses;
nuclear@0: }
nuclear@0: }
nuclear@0: // flag triangle as emitted
nuclear@0: abEmitted[fidx] = true;
nuclear@0: }
nuclear@0: }
nuclear@0:
nuclear@0: // the vertex has now no living adjacent triangles anymore
nuclear@0: piNumTriPtr[ivdx] = 0;
nuclear@0:
nuclear@0: // get next fanning vertex
nuclear@0: ivdx = -1;
nuclear@0: int max_priority = -1;
nuclear@0: for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
nuclear@0: register const unsigned int dp = *piCur;
nuclear@0:
nuclear@0: // must have live triangles
nuclear@0: if (piNumTriPtr[dp] > 0) {
nuclear@0: int priority = 0;
nuclear@0:
nuclear@0: // will the vertex be in cache, even after fanning occurs?
nuclear@0: unsigned int tmp;
nuclear@0: if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) {
nuclear@0: priority = tmp;
nuclear@0: }
nuclear@0:
nuclear@0: // keep best candidate
nuclear@0: if (priority > max_priority) {
nuclear@0: max_priority = priority;
nuclear@0: ivdx = dp;
nuclear@0: }
nuclear@0: }
nuclear@0: }
nuclear@0: // did we reach a dead end?
nuclear@0: if (-1 == ivdx) {
nuclear@0: // need to get a non-local vertex for which we have a good chance that it is still
nuclear@0: // in the cache ...
nuclear@0: while (!sDeadEndVStack.empty()) {
nuclear@0: unsigned int iCachedIdx = sDeadEndVStack.top();
nuclear@0: sDeadEndVStack.pop();
nuclear@0: if (piNumTriPtr[ iCachedIdx ] > 0) {
nuclear@0: ivdx = iCachedIdx;
nuclear@0: break;
nuclear@0: }
nuclear@0: }
nuclear@0:
nuclear@0: if (-1 == ivdx) {
nuclear@0: // well, there isn't such a vertex. Simply get the next vertex in input order and
nuclear@0: // hope it is not too bad ...
nuclear@0: while (ics < (int)pMesh->mNumVertices) {
nuclear@0: ++ics;
nuclear@0: if (piNumTriPtr[ics] > 0) {
nuclear@0: ivdx = ics;
nuclear@0: break;
nuclear@0: }
nuclear@0: }
nuclear@0: }
nuclear@0: }
nuclear@0: }
nuclear@0: float fACMR2 = 0.0f;
nuclear@0: if (!DefaultLogger::isNullLogger()) {
nuclear@0: fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
nuclear@0:
nuclear@0: // very intense verbose logging ... prepare for much text if there are many meshes
nuclear@0: if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
nuclear@0: char szBuff[128]; // should be sufficiently large in every case
nuclear@0:
nuclear@0: ::sprintf(szBuff,"Mesh %i | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2,
nuclear@0: ((fACMR - fACMR2) / fACMR) * 100.f);
nuclear@0: DefaultLogger::get()->debug(szBuff);
nuclear@0: }
nuclear@0:
nuclear@0: fACMR2 *= pMesh->mNumFaces;
nuclear@0: }
nuclear@0: // sort the output index buffer back to the input array
nuclear@0: piCSIter = piIBOutput;
nuclear@0: for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
nuclear@0: pcFace->mIndices[0] = *piCSIter++;
nuclear@0: pcFace->mIndices[1] = *piCSIter++;
nuclear@0: pcFace->mIndices[2] = *piCSIter++;
nuclear@0: }
nuclear@0:
nuclear@0: // delete temporary storage
nuclear@0: delete[] piCachingStamps;
nuclear@0: delete[] piIBOutput;
nuclear@0: delete[] piCandidates;
nuclear@0:
nuclear@0: return fACMR2;
nuclear@0: }