vrshoot

annotate libs/assimp/ImproveCacheLocality.cpp @ 1:e7ca128b8713

looks nice :)
author John Tsiombikas <nuclear@member.fsf.org>
date Sun, 02 Feb 2014 00:35:22 +0200
parents
children
rev   line source
nuclear@0 1 /*
nuclear@0 2 ---------------------------------------------------------------------------
nuclear@0 3 Open Asset Import Library (assimp)
nuclear@0 4 ---------------------------------------------------------------------------
nuclear@0 5
nuclear@0 6 Copyright (c) 2006-2012, assimp team
nuclear@0 7
nuclear@0 8 All rights reserved.
nuclear@0 9
nuclear@0 10 Redistribution and use of this software in source and binary forms,
nuclear@0 11 with or without modification, are permitted provided that the following
nuclear@0 12 conditions are met:
nuclear@0 13
nuclear@0 14 * Redistributions of source code must retain the above
nuclear@0 15 copyright notice, this list of conditions and the
nuclear@0 16 following disclaimer.
nuclear@0 17
nuclear@0 18 * Redistributions in binary form must reproduce the above
nuclear@0 19 copyright notice, this list of conditions and the
nuclear@0 20 following disclaimer in the documentation and/or other
nuclear@0 21 materials provided with the distribution.
nuclear@0 22
nuclear@0 23 * Neither the name of the assimp team, nor the names of its
nuclear@0 24 contributors may be used to endorse or promote products
nuclear@0 25 derived from this software without specific prior
nuclear@0 26 written permission of the assimp team.
nuclear@0 27
nuclear@0 28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
nuclear@0 29 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
nuclear@0 30 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
nuclear@0 31 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
nuclear@0 32 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
nuclear@0 33 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
nuclear@0 34 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
nuclear@0 35 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
nuclear@0 36 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
nuclear@0 37 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
nuclear@0 38 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
nuclear@0 39 ---------------------------------------------------------------------------
nuclear@0 40 */
nuclear@0 41
nuclear@0 42 /** @file Implementation of the post processing step to improve the cache locality of a mesh.
nuclear@0 43 * <br>
nuclear@0 44 * The algorithm is roughly basing on this paper:
nuclear@0 45 * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
nuclear@0 46 * .. although overdraw rduction isn't implemented yet ...
nuclear@0 47 */
nuclear@0 48
nuclear@0 49 #include "AssimpPCH.h"
nuclear@0 50
nuclear@0 51 // internal headers
nuclear@0 52 #include "ImproveCacheLocality.h"
nuclear@0 53 #include "VertexTriangleAdjacency.h"
nuclear@0 54
nuclear@0 55 using namespace Assimp;
nuclear@0 56
nuclear@0 57 // ------------------------------------------------------------------------------------------------
nuclear@0 58 // Constructor to be privately used by Importer
nuclear@0 59 ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() {
nuclear@0 60 configCacheDepth = PP_ICL_PTCACHE_SIZE;
nuclear@0 61 }
nuclear@0 62
nuclear@0 63 // ------------------------------------------------------------------------------------------------
nuclear@0 64 // Destructor, private as well
nuclear@0 65 ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess()
nuclear@0 66 {
nuclear@0 67 // nothing to do here
nuclear@0 68 }
nuclear@0 69
nuclear@0 70 // ------------------------------------------------------------------------------------------------
nuclear@0 71 // Returns whether the processing step is present in the given flag field.
nuclear@0 72 bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const
nuclear@0 73 {
nuclear@0 74 return (pFlags & aiProcess_ImproveCacheLocality) != 0;
nuclear@0 75 }
nuclear@0 76
nuclear@0 77 // ------------------------------------------------------------------------------------------------
nuclear@0 78 // Setup configuration
nuclear@0 79 void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp)
nuclear@0 80 {
nuclear@0 81 // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
nuclear@0 82 configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
nuclear@0 83 }
nuclear@0 84
nuclear@0 85 // ------------------------------------------------------------------------------------------------
nuclear@0 86 // Executes the post processing step on the given imported data.
nuclear@0 87 void ImproveCacheLocalityProcess::Execute( aiScene* pScene)
nuclear@0 88 {
nuclear@0 89 if (!pScene->mNumMeshes) {
nuclear@0 90 DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes");
nuclear@0 91 return;
nuclear@0 92 }
nuclear@0 93
nuclear@0 94 DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin");
nuclear@0 95
nuclear@0 96 float out = 0.f;
nuclear@0 97 unsigned int numf = 0, numm = 0;
nuclear@0 98 for( unsigned int a = 0; a < pScene->mNumMeshes; a++){
nuclear@0 99 const float res = ProcessMesh( pScene->mMeshes[a],a);
nuclear@0 100 if (res) {
nuclear@0 101 numf += pScene->mMeshes[a]->mNumFaces;
nuclear@0 102 out += res;
nuclear@0 103 ++numm;
nuclear@0 104 }
nuclear@0 105 }
nuclear@0 106 if (!DefaultLogger::isNullLogger()) {
nuclear@0 107 char szBuff[128]; // should be sufficiently large in every case
nuclear@0 108 ::sprintf(szBuff,"Cache relevant are %i meshes (%i faces). Average output ACMR is %f",
nuclear@0 109 numm,numf,out/numf);
nuclear@0 110
nuclear@0 111 DefaultLogger::get()->info(szBuff);
nuclear@0 112 DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. ");
nuclear@0 113 }
nuclear@0 114 }
nuclear@0 115
nuclear@0 116 // ------------------------------------------------------------------------------------------------
nuclear@0 117 // Improves the cache coherency of a specific mesh
nuclear@0 118 float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum)
nuclear@0 119 {
nuclear@0 120 // TODO: rewrite this to use std::vector or boost::shared_array
nuclear@0 121 ai_assert(NULL != pMesh);
nuclear@0 122
nuclear@0 123 // Check whether the input data is valid
nuclear@0 124 // - there must be vertices and faces
nuclear@0 125 // - all faces must be triangulated or we can't operate on them
nuclear@0 126 if (!pMesh->HasFaces() || !pMesh->HasPositions())
nuclear@0 127 return 0.f;
nuclear@0 128
nuclear@0 129 if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
nuclear@0 130 DefaultLogger::get()->error("This algorithm works on triangle meshes only");
nuclear@0 131 return 0.f;
nuclear@0 132 }
nuclear@0 133
nuclear@0 134 if(pMesh->mNumVertices <= configCacheDepth) {
nuclear@0 135 return 0.f;
nuclear@0 136 }
nuclear@0 137
nuclear@0 138 float fACMR = 3.f;
nuclear@0 139 const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
nuclear@0 140
nuclear@0 141 // Input ACMR is for logging purposes only
nuclear@0 142 if (!DefaultLogger::isNullLogger()) {
nuclear@0 143
nuclear@0 144 unsigned int* piFIFOStack = new unsigned int[configCacheDepth];
nuclear@0 145 memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int));
nuclear@0 146 unsigned int* piCur = piFIFOStack;
nuclear@0 147 const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth;
nuclear@0 148
nuclear@0 149 // count the number of cache misses
nuclear@0 150 unsigned int iCacheMisses = 0;
nuclear@0 151 for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
nuclear@0 152
nuclear@0 153 for (unsigned int qq = 0; qq < 3;++qq) {
nuclear@0 154 bool bInCache = false;
nuclear@0 155
nuclear@0 156 for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
nuclear@0 157 if (*pp == pcFace->mIndices[qq]) {
nuclear@0 158 // the vertex is in cache
nuclear@0 159 bInCache = true;
nuclear@0 160 break;
nuclear@0 161 }
nuclear@0 162 }
nuclear@0 163 if (!bInCache) {
nuclear@0 164 ++iCacheMisses;
nuclear@0 165 if (piCurEnd == piCur) {
nuclear@0 166 piCur = piFIFOStack;
nuclear@0 167 }
nuclear@0 168 *piCur++ = pcFace->mIndices[qq];
nuclear@0 169 }
nuclear@0 170 }
nuclear@0 171 }
nuclear@0 172 delete[] piFIFOStack;
nuclear@0 173 fACMR = (float)iCacheMisses / pMesh->mNumFaces;
nuclear@0 174 if (3.0 == fACMR) {
nuclear@0 175 char szBuff[128]; // should be sufficiently large in every case
nuclear@0 176
nuclear@0 177 // the JoinIdenticalVertices process has not been executed on this
nuclear@0 178 // mesh, otherwise this value would normally be at least minimally
nuclear@0 179 // smaller than 3.0 ...
nuclear@0 180 sprintf(szBuff,"Mesh %i: Not suitable for vcache optimization",meshNum);
nuclear@0 181 DefaultLogger::get()->warn(szBuff);
nuclear@0 182 return 0.f;
nuclear@0 183 }
nuclear@0 184 }
nuclear@0 185
nuclear@0 186 // first we need to build a vertex-triangle adjacency list
nuclear@0 187 VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
nuclear@0 188
nuclear@0 189 // build a list to store per-vertex caching time stamps
nuclear@0 190 unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
nuclear@0 191 memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
nuclear@0 192
nuclear@0 193 // allocate an empty output index buffer. We store the output indices in one large array.
nuclear@0 194 // Since the number of triangles won't change the input faces can be reused. This is how
nuclear@0 195 // we save thousands of redundant mini allocations for aiFace::mIndices
nuclear@0 196 const unsigned int iIdxCnt = pMesh->mNumFaces*3;
nuclear@0 197 unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
nuclear@0 198 unsigned int* piCSIter = piIBOutput;
nuclear@0 199
nuclear@0 200 // allocate the flag array to hold the information
nuclear@0 201 // whether a face has already been emitted or not
nuclear@0 202 std::vector<bool> abEmitted(pMesh->mNumFaces,false);
nuclear@0 203
nuclear@0 204 // dead-end vertex index stack
nuclear@0 205 std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
nuclear@0 206
nuclear@0 207 // create a copy of the piNumTriPtr buffer
nuclear@0 208 unsigned int* const piNumTriPtr = adj.mLiveTriangles;
nuclear@0 209 const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
nuclear@0 210
nuclear@0 211 // get the largest number of referenced triangles and allocate the "candidate buffer"
nuclear@0 212 unsigned int iMaxRefTris = 0; {
nuclear@0 213 const unsigned int* piCur = adj.mLiveTriangles;
nuclear@0 214 const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
nuclear@0 215 for (;piCur != piCurEnd;++piCur) {
nuclear@0 216 iMaxRefTris = std::max(iMaxRefTris,*piCur);
nuclear@0 217 }
nuclear@0 218 }
nuclear@0 219 unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
nuclear@0 220 unsigned int iCacheMisses = 0;
nuclear@0 221
nuclear@0 222 // ...................................................................................
nuclear@0 223 /** PSEUDOCODE for the algorithm
nuclear@0 224
nuclear@0 225 A = Build-Adjacency(I) Vertex-triangle adjacency
nuclear@0 226 L = Get-Triangle-Counts(A) Per-vertex live triangle counts
nuclear@0 227 C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
nuclear@0 228 D = Empty-Stack() Dead-end vertex stack
nuclear@0 229 E = False(Triangle-Count(I)) Per triangle emitted flag
nuclear@0 230 O = Empty-Index-Buffer() Empty output buffer
nuclear@0 231 f = 0 Arbitrary starting vertex
nuclear@0 232 s = k+1, i = 1 Time stamp and cursor
nuclear@0 233 while f >= 0 For all valid fanning vertices
nuclear@0 234 N = Empty-Set() 1-ring of next candidates
nuclear@0 235 for each Triangle t in Neighbors(A, f)
nuclear@0 236 if !Emitted(E,t)
nuclear@0 237 for each Vertex v in t
nuclear@0 238 Append(O,v) Output vertex
nuclear@0 239 Push(D,v) Add to dead-end stack
nuclear@0 240 Insert(N,v) Register as candidate
nuclear@0 241 L[v] = L[v]-1 Decrease live triangle count
nuclear@0 242 if s-C[v] > k If not in cache
nuclear@0 243 C[v] = s Set time stamp
nuclear@0 244 s = s+1 Increment time stamp
nuclear@0 245 E[t] = true Flag triangle as emitted
nuclear@0 246 Select next fanning vertex
nuclear@0 247 f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
nuclear@0 248 return O
nuclear@0 249 */
nuclear@0 250 // ...................................................................................
nuclear@0 251
nuclear@0 252 int ivdx = 0;
nuclear@0 253 int ics = 1;
nuclear@0 254 int iStampCnt = configCacheDepth+1;
nuclear@0 255 while (ivdx >= 0) {
nuclear@0 256
nuclear@0 257 unsigned int icnt = piNumTriPtrNoModify[ivdx];
nuclear@0 258 unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
nuclear@0 259 unsigned int* piCurCandidate = piCandidates;
nuclear@0 260
nuclear@0 261 // get all triangles in the neighborhood
nuclear@0 262 for (unsigned int tri = 0; tri < icnt;++tri) {
nuclear@0 263
nuclear@0 264 // if they have not yet been emitted, add them to the output IB
nuclear@0 265 const unsigned int fidx = *piList++;
nuclear@0 266 if (!abEmitted[fidx]) {
nuclear@0 267
nuclear@0 268 // so iterate through all vertices of the current triangle
nuclear@0 269 const aiFace* pcFace = &pMesh->mFaces[ fidx ];
nuclear@0 270 for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p) {
nuclear@0 271 const unsigned int dp = *p;
nuclear@0 272
nuclear@0 273 // the current vertex won't have any free triangles after this step
nuclear@0 274 if (ivdx != (int)dp) {
nuclear@0 275 // append the vertex to the dead-end stack
nuclear@0 276 sDeadEndVStack.push(dp);
nuclear@0 277
nuclear@0 278 // register as candidate for the next step
nuclear@0 279 *piCurCandidate++ = dp;
nuclear@0 280
nuclear@0 281 // decrease the per-vertex triangle counts
nuclear@0 282 piNumTriPtr[dp]--;
nuclear@0 283 }
nuclear@0 284
nuclear@0 285 // append the vertex to the output index buffer
nuclear@0 286 *piCSIter++ = dp;
nuclear@0 287
nuclear@0 288 // if the vertex is not yet in cache, set its cache count
nuclear@0 289 if (iStampCnt-piCachingStamps[dp] > configCacheDepth) {
nuclear@0 290 piCachingStamps[dp] = iStampCnt++;
nuclear@0 291 ++iCacheMisses;
nuclear@0 292 }
nuclear@0 293 }
nuclear@0 294 // flag triangle as emitted
nuclear@0 295 abEmitted[fidx] = true;
nuclear@0 296 }
nuclear@0 297 }
nuclear@0 298
nuclear@0 299 // the vertex has now no living adjacent triangles anymore
nuclear@0 300 piNumTriPtr[ivdx] = 0;
nuclear@0 301
nuclear@0 302 // get next fanning vertex
nuclear@0 303 ivdx = -1;
nuclear@0 304 int max_priority = -1;
nuclear@0 305 for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
nuclear@0 306 register const unsigned int dp = *piCur;
nuclear@0 307
nuclear@0 308 // must have live triangles
nuclear@0 309 if (piNumTriPtr[dp] > 0) {
nuclear@0 310 int priority = 0;
nuclear@0 311
nuclear@0 312 // will the vertex be in cache, even after fanning occurs?
nuclear@0 313 unsigned int tmp;
nuclear@0 314 if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) {
nuclear@0 315 priority = tmp;
nuclear@0 316 }
nuclear@0 317
nuclear@0 318 // keep best candidate
nuclear@0 319 if (priority > max_priority) {
nuclear@0 320 max_priority = priority;
nuclear@0 321 ivdx = dp;
nuclear@0 322 }
nuclear@0 323 }
nuclear@0 324 }
nuclear@0 325 // did we reach a dead end?
nuclear@0 326 if (-1 == ivdx) {
nuclear@0 327 // need to get a non-local vertex for which we have a good chance that it is still
nuclear@0 328 // in the cache ...
nuclear@0 329 while (!sDeadEndVStack.empty()) {
nuclear@0 330 unsigned int iCachedIdx = sDeadEndVStack.top();
nuclear@0 331 sDeadEndVStack.pop();
nuclear@0 332 if (piNumTriPtr[ iCachedIdx ] > 0) {
nuclear@0 333 ivdx = iCachedIdx;
nuclear@0 334 break;
nuclear@0 335 }
nuclear@0 336 }
nuclear@0 337
nuclear@0 338 if (-1 == ivdx) {
nuclear@0 339 // well, there isn't such a vertex. Simply get the next vertex in input order and
nuclear@0 340 // hope it is not too bad ...
nuclear@0 341 while (ics < (int)pMesh->mNumVertices) {
nuclear@0 342 ++ics;
nuclear@0 343 if (piNumTriPtr[ics] > 0) {
nuclear@0 344 ivdx = ics;
nuclear@0 345 break;
nuclear@0 346 }
nuclear@0 347 }
nuclear@0 348 }
nuclear@0 349 }
nuclear@0 350 }
nuclear@0 351 float fACMR2 = 0.0f;
nuclear@0 352 if (!DefaultLogger::isNullLogger()) {
nuclear@0 353 fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
nuclear@0 354
nuclear@0 355 // very intense verbose logging ... prepare for much text if there are many meshes
nuclear@0 356 if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
nuclear@0 357 char szBuff[128]; // should be sufficiently large in every case
nuclear@0 358
nuclear@0 359 ::sprintf(szBuff,"Mesh %i | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2,
nuclear@0 360 ((fACMR - fACMR2) / fACMR) * 100.f);
nuclear@0 361 DefaultLogger::get()->debug(szBuff);
nuclear@0 362 }
nuclear@0 363
nuclear@0 364 fACMR2 *= pMesh->mNumFaces;
nuclear@0 365 }
nuclear@0 366 // sort the output index buffer back to the input array
nuclear@0 367 piCSIter = piIBOutput;
nuclear@0 368 for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
nuclear@0 369 pcFace->mIndices[0] = *piCSIter++;
nuclear@0 370 pcFace->mIndices[1] = *piCSIter++;
nuclear@0 371 pcFace->mIndices[2] = *piCSIter++;
nuclear@0 372 }
nuclear@0 373
nuclear@0 374 // delete temporary storage
nuclear@0 375 delete[] piCachingStamps;
nuclear@0 376 delete[] piIBOutput;
nuclear@0 377 delete[] piCandidates;
nuclear@0 378
nuclear@0 379 return fACMR2;
nuclear@0 380 }