oculus1
diff libovr/Src/Kernel/OVR_Atomic.h @ 1:e2f9e4603129
added LibOVR and started a simple vr wrapper.
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 14 Sep 2013 16:14:59 +0300 |
parents | |
children | b069a5c27388 |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libovr/Src/Kernel/OVR_Atomic.h Sat Sep 14 16:14:59 2013 +0300 1.3 @@ -0,0 +1,1 @@ 1.4 +/************************************************************************************ 1.5 1.6 PublicHeader: OVR.h 1.7 Filename : OVR_Atomic.h 1.8 Content : Contains atomic operations and inline fastest locking 1.9 functionality. Will contain #ifdefs for OS efficiency. 1.10 Have non-thread-safe implementaion if not available. 1.11 Created : September 19, 2012 1.12 Notes : 1.13 1.14 Copyright : Copyright 2012 Oculus VR, Inc. All Rights reserved. 1.15 1.16 Use of this software is subject to the terms of the Oculus license 1.17 agreement provided at the time of installation or download, or which 1.18 otherwise accompanies this software in either electronic or hard copy form. 1.19 1.20 ************************************************************************************/ 1.21 #ifndef OVR_Atomic_h 1.22 #define OVR_Atomic_h 1.23 1.24 #include "OVR_Types.h" 1.25 1.26 // Include System thread functionality. 1.27 #if defined(OVR_OS_WIN32) 1.28 #include <windows.h> 1.29 #else 1.30 #include <pthread.h> 1.31 #endif 1.32 1.33 1.34 namespace OVR { 1.35 1.36 1.37 // ****** Declared classes 1.38 1.39 // If there is NO thread support we implement AtomicOps and 1.40 // Lock objects as no-ops. The other classes are not defined. 1.41 template<class C> class AtomicOps; 1.42 template<class T> class AtomicInt; 1.43 template<class T> class AtomicPtr; 1.44 1.45 class Lock; 1.46 1.47 1.48 //----------------------------------------------------------------------------------- 1.49 // ***** AtomicOps 1.50 1.51 // Atomic operations are provided by the AtomicOps templates class, 1.52 // implemented through system-specific AtomicOpsRaw specializations. 1.53 // It provides several fundamental operations such as Exchange, ExchangeAdd 1.54 // CompareAndSet, and Store_Release. Each function includes several memory 1.55 // synchronization versions, important for multiprocessing CPUs with weak 1.56 // memory consistency. The following memory fencing strategies are supported: 1.57 // 1.58 // - NoSync. No memory synchronization is done for atomic op. 1.59 // - Release. All other memory writes are completed before atomic op 1.60 // writes its results. 1.61 // - Acquire. Further memory reads are forced to wait until atomic op 1.62 // executes, guaranteeing that the right values will be seen. 1.63 // - Sync. A combination of Release and Acquire. 1.64 1.65 1.66 // *** AtomicOpsRaw 1.67 1.68 // AtomicOpsRaw is a specialized template that provides atomic operations 1.69 // used by AtomicOps. This class has two fundamental qualities: (1) it 1.70 // defines a type T of correct size, and (2) provides operations that work 1.71 // atomically, such as Exchange_Sync and CompareAndSet_Release. 1.72 1.73 // AtomicOpsRawBase class contains shared constants/classes for AtomicOpsRaw. 1.74 // The primary thing is does is define sync class objects, whose destructor and 1.75 // constructor provide places to insert appropriate synchronization calls, on 1.76 // systems where such calls are necessary. So far, the breakdown is as follows: 1.77 // 1.78 // - X86 systems don't need custom syncs, since their exchange/atomic 1.79 // instructions are implicitly synchronized. 1.80 // - PowerPC requires lwsync/isync instructions that can use this mechanism. 1.81 // - If some other systems require a mechanism where syncing type is associated 1.82 // with a particular instruction, the default implementation (which implements 1.83 // all Sync, Acquire, and Release modes in terms of NoSync and fence) may not 1.84 // work. Ii that case it will need to be #ifdef-ed conditionally. 1.85 1.86 struct AtomicOpsRawBase 1.87 { 1.88 #if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_OS_WIN32) || defined(OVR_OS_IPHONE) 1.89 // Need to have empty constructor to avoid class 'unused' variable warning. 1.90 struct FullSync { inline FullSync() { } }; 1.91 struct AcquireSync { inline AcquireSync() { } }; 1.92 struct ReleaseSync { inline ReleaseSync() { } }; 1.93 1.94 #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) 1.95 struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("isync\n"); } }; 1.96 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("isync\n"); } }; 1.97 struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; 1.98 1.99 #elif defined(OVR_CPU_MIPS) 1.100 struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("sync\n"); } }; 1.101 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("sync\n"); } }; 1.102 struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; 1.103 1.104 #elif defined(OVR_CPU_ARM) 1.105 struct FullSync { inline FullSync() { asm volatile("dmb\n"); } ~FullSync() { asm volatile("dmb\n"); } }; 1.106 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("dmb\n"); } }; 1.107 struct ReleaseSync { inline ReleaseSync() { asm volatile("dmb\n"); } }; 1.108 1.109 1.110 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4) 1.111 // __sync functions are already full sync 1.112 struct FullSync { inline FullSync() { } }; 1.113 struct AcquireSync { inline AcquireSync() { } }; 1.114 struct ReleaseSync { inline ReleaseSync() { } }; 1.115 #endif 1.116 }; 1.117 1.118 1.119 // 4-Byte raw data atomic op implementation class. 1.120 struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase 1.121 { 1.122 #if !defined(OVR_ENABLE_THREADS) 1.123 1.124 // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. 1.125 typedef UInt32 T; 1.126 1.127 // *** Thread - Safe Atomic Versions. 1.128 1.129 #elif defined(OVR_OS_WIN32) 1.130 1.131 // Use special defined for VC6, where volatile is not used and 1.132 // InterlockedCompareExchange is declared incorrectly. 1.133 typedef LONG T; 1.134 #if defined(OVR_CC_MSVC) && (OVR_CC_MSVC < 1300) 1.135 typedef T* InterlockTPtr; 1.136 typedef LPVOID ET; 1.137 typedef ET* InterlockETPtr; 1.138 #else 1.139 typedef volatile T* InterlockTPtr; 1.140 typedef T ET; 1.141 typedef InterlockTPtr InterlockETPtr; 1.142 #endif 1.143 inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange((InterlockTPtr)p, val); } 1.144 inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd((InterlockTPtr)p, val); } 1.145 inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange((InterlockETPtr)p, (ET)val, (ET)c) == (ET)c; } 1.146 1.147 #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) 1.148 typedef UInt32 T; 1.149 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) 1.150 { 1.151 UInt32 ret; 1.152 1.153 asm volatile("1:\n\t" 1.154 "lwarx %[r],0,%[i]\n\t" 1.155 "stwcx. %[j],0,%[i]\n\t" 1.156 "bne- 1b\n" 1.157 : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); 1.158 1.159 return ret; 1.160 } 1.161 1.162 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) 1.163 { 1.164 UInt32 dummy, ret; 1.165 1.166 asm volatile("1:\n\t" 1.167 "lwarx %[r],0,%[i]\n\t" 1.168 "add %[o],%[r],%[j]\n\t" 1.169 "stwcx. %[o],0,%[i]\n\t" 1.170 "bne- 1b\n" 1.171 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); 1.172 1.173 return ret; 1.174 } 1.175 1.176 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) 1.177 { 1.178 UInt32 ret; 1.179 1.180 asm volatile("1:\n\t" 1.181 "lwarx %[r],0,%[i]\n\t" 1.182 "cmpw 0,%[r],%[cmp]\n\t" 1.183 "mfcr %[r]\n\t" 1.184 "bne- 2f\n\t" 1.185 "stwcx. %[val],0,%[i]\n\t" 1.186 "bne- 1b\n\t" 1.187 "2:\n" 1.188 : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc", "memory"); 1.189 1.190 return (ret & 0x20000000) ? 1 : 0; 1.191 } 1.192 1.193 #elif defined(OVR_CPU_MIPS) 1.194 typedef UInt32 T; 1.195 1.196 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) 1.197 { 1.198 UInt32 ret; 1.199 1.200 asm volatile("1:\n\t" 1.201 "ll %[r],0(%[i])\n\t" 1.202 "sc %[j],0(%[i])\n\t" 1.203 "beq %[j],$0,1b\n\t" 1.204 "nop \n" 1.205 : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); 1.206 1.207 return ret; 1.208 } 1.209 1.210 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) 1.211 { 1.212 UInt32 ret; 1.213 1.214 asm volatile("1:\n\t" 1.215 "ll %[r],0(%[i])\n\t" 1.216 "addu %[j],%[r],%[j]\n\t" 1.217 "sc %[j],0(%[i])\n\t" 1.218 "beq %[j],$0,1b\n\t" 1.219 "nop \n" 1.220 : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); 1.221 1.222 return ret; 1.223 } 1.224 1.225 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) 1.226 { 1.227 UInt32 ret, dummy; 1.228 1.229 asm volatile("1:\n\t" 1.230 "move %[r],$0\n\t" 1.231 "ll %[o],0(%[i])\n\t" 1.232 "bne %[o],%[c],2f\n\t" 1.233 "move %[r],%[v]\n\t" 1.234 "sc %[r],0(%[i])\n\t" 1.235 "beq %[r],$0,1b\n\t" 1.236 "nop \n\t" 1.237 "2:\n" 1.238 : "+m" (*i),[r] "=&d" (ret), [o] "=&d" (dummy) : [i] "d" (i), [c] "d" (c), [v] "d" (value) 1.239 : "cc", "memory"); 1.240 1.241 return ret; 1.242 } 1.243 1.244 #elif defined(OVR_CPU_ARM) && defined(OVR_CC_ARM) 1.245 typedef UInt32 T; 1.246 1.247 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) 1.248 { 1.249 for(;;) 1.250 { 1.251 T r = __ldrex(i); 1.252 if (__strex(j, i) == 0) 1.253 return r; 1.254 } 1.255 } 1.256 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) 1.257 { 1.258 for(;;) 1.259 { 1.260 T r = __ldrex(i); 1.261 if (__strex(r + j, i) == 0) 1.262 return r; 1.263 } 1.264 } 1.265 1.266 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) 1.267 { 1.268 for(;;) 1.269 { 1.270 T r = __ldrex(i); 1.271 if (r != c) 1.272 return 0; 1.273 if (__strex(value, i) == 0) 1.274 return 1; 1.275 } 1.276 } 1.277 1.278 #elif defined(OVR_CPU_ARM) 1.279 typedef UInt32 T; 1.280 1.281 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) 1.282 { 1.283 UInt32 ret, dummy; 1.284 1.285 asm volatile("1:\n\t" 1.286 "ldrex %[r],[%[i]]\n\t" 1.287 "strex %[t],%[j],[%[i]]\n\t" 1.288 "cmp %[t],#0\n\t" 1.289 "bne 1b\n\t" 1.290 : "+m" (*i), [r] "=&r" (ret), [t] "=&r" (dummy) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); 1.291 1.292 return ret; 1.293 } 1.294 1.295 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) 1.296 { 1.297 UInt32 ret, dummy, test; 1.298 1.299 asm volatile("1:\n\t" 1.300 "ldrex %[r],[%[i]]\n\t" 1.301 "add %[o],%[r],%[j]\n\t" 1.302 "strex %[t],%[o],[%[i]]\n\t" 1.303 "cmp %[t],#0\n\t" 1.304 "bne 1b\n\t" 1.305 : "+m" (*i), [r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); 1.306 1.307 return ret; 1.308 } 1.309 1.310 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) 1.311 { 1.312 UInt32 ret = 1, dummy, test; 1.313 1.314 asm volatile("1:\n\t" 1.315 "ldrex %[o],[%[i]]\n\t" 1.316 "cmp %[o],%[c]\n\t" 1.317 "bne 2f\n\t" 1.318 "strex %[r],%[v],[%[i]]\n\t" 1.319 "cmp %[r],#0\n\t" 1.320 "bne 1b\n\t" 1.321 "2:\n" 1.322 : "+m" (*i),[r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [c] "r" (c), [v] "r" (value) 1.323 : "cc", "memory"); 1.324 1.325 return !ret; 1.326 } 1.327 1.328 #elif defined(OVR_CPU_X86) 1.329 typedef UInt32 T; 1.330 1.331 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) 1.332 { 1.333 asm volatile("xchgl %1,%[i]\n" 1.334 : "+m" (*i), "=q" (j) : [i] "m" (*i), "1" (j) : "cc", "memory"); 1.335 1.336 return j; 1.337 } 1.338 1.339 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) 1.340 { 1.341 asm volatile("lock; xaddl %1,%[i]\n" 1.342 : "+m" (*i), "+q" (j) : [i] "m" (*i) : "cc", "memory"); 1.343 1.344 return j; 1.345 } 1.346 1.347 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) 1.348 { 1.349 UInt32 ret; 1.350 1.351 asm volatile("lock; cmpxchgl %[v],%[i]\n" 1.352 : "+m" (*i), "=a" (ret) : [i] "m" (*i), "1" (c), [v] "q" (value) : "cc", "memory"); 1.353 1.354 return (ret == c); 1.355 } 1.356 1.357 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) 1.358 1.359 typedef UInt32 T; 1.360 1.361 static inline T Exchange_NoSync(volatile T *i, T j) 1.362 { 1.363 T v; 1.364 do { 1.365 v = *i; 1.366 } while (!__sync_bool_compare_and_swap(i, v, j)); 1.367 return v; 1.368 } 1.369 1.370 static inline T ExchangeAdd_NoSync(volatile T *i, T j) 1.371 { 1.372 return __sync_fetch_and_add(i, j); 1.373 } 1.374 1.375 static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) 1.376 { 1.377 return __sync_bool_compare_and_swap(i, c, value); 1.378 } 1.379 1.380 #endif // OS 1.381 }; 1.382 1.383 1.384 // 8-Byte raw data data atomic op implementation class. 1.385 // Currently implementation is provided only on systems with 64-bit pointers. 1.386 struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase 1.387 { 1.388 #if !defined(OVR_64BIT_POINTERS) || !defined(OVR_ENABLE_THREADS) 1.389 1.390 // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. 1.391 typedef UInt64 T; 1.392 1.393 // *** Thread - Safe OS specific versions. 1.394 #elif defined(OVR_OS_WIN32) 1.395 1.396 // This is only for 64-bit systems. 1.397 typedef LONG64 T; 1.398 typedef volatile T* InterlockTPtr; 1.399 inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange64((InterlockTPtr)p, val); } 1.400 inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd64((InterlockTPtr)p, val); } 1.401 inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange64((InterlockTPtr)p, val, c) == c; } 1.402 1.403 #elif defined(OVR_CPU_PPC64) 1.404 1.405 typedef UInt64 T; 1.406 1.407 static inline UInt64 Exchange_NoSync(volatile UInt64 *i, UInt64 j) 1.408 { 1.409 UInt64 dummy, ret; 1.410 1.411 asm volatile("1:\n\t" 1.412 "ldarx %[r],0,%[i]\n\t" 1.413 "mr %[o],%[j]\n\t" 1.414 "stdcx. %[o],0,%[i]\n\t" 1.415 "bne- 1b\n" 1.416 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); 1.417 1.418 return ret; 1.419 } 1.420 1.421 static inline UInt64 ExchangeAdd_NoSync(volatile UInt64 *i, UInt64 j) 1.422 { 1.423 UInt64 dummy, ret; 1.424 1.425 asm volatile("1:\n\t" 1.426 "ldarx %[r],0,%[i]\n\t" 1.427 "add %[o],%[r],%[j]\n\t" 1.428 "stdcx. %[o],0,%[i]\n\t" 1.429 "bne- 1b\n" 1.430 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); 1.431 1.432 return ret; 1.433 } 1.434 1.435 static inline bool CompareAndSet_NoSync(volatile UInt64 *i, UInt64 c, UInt64 value) 1.436 { 1.437 UInt64 ret, dummy; 1.438 1.439 asm volatile("1:\n\t" 1.440 "ldarx %[r],0,%[i]\n\t" 1.441 "cmpw 0,%[r],%[cmp]\n\t" 1.442 "mfcr %[r]\n\t" 1.443 "bne- 2f\n\t" 1.444 "stdcx. %[val],0,%[i]\n\t" 1.445 "bne- 1b\n\t" 1.446 "2:\n" 1.447 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc"); 1.448 1.449 return (ret & 0x20000000) ? 1 : 0; 1.450 } 1.451 1.452 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) 1.453 1.454 typedef UInt64 T; 1.455 1.456 static inline T Exchange_NoSync(volatile T *i, T j) 1.457 { 1.458 T v; 1.459 do { 1.460 v = *i; 1.461 } while (!__sync_bool_compare_and_swap(i, v, j)); 1.462 return v; 1.463 } 1.464 1.465 static inline T ExchangeAdd_NoSync(volatile T *i, T j) 1.466 { 1.467 return __sync_fetch_and_add(i, j); 1.468 } 1.469 1.470 static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) 1.471 { 1.472 return __sync_bool_compare_and_swap(i, c, value); 1.473 } 1.474 1.475 #endif // OS 1.476 }; 1.477 1.478 1.479 // Default implementation for AtomicOpsRaw; provides implementation of mem-fenced 1.480 // atomic operations where fencing is done with a sync object wrapped around a NoSync 1.481 // operation implemented in the base class. If such implementation is not possible 1.482 // on a given platform, #ifdefs can be used to disable it and then op functions can be 1.483 // implemented individually in the appropriate AtomicOpsRaw<size> class. 1.484 1.485 template<class O> 1.486 struct AtomicOpsRaw_DefImpl : public O 1.487 { 1.488 typedef typename O::T O_T; 1.489 typedef typename O::FullSync O_FullSync; 1.490 typedef typename O::AcquireSync O_AcquireSync; 1.491 typedef typename O::ReleaseSync O_ReleaseSync; 1.492 1.493 // If there is no thread support, provide the default implementation. In this case, 1.494 // the base class (0) must still provide the T declaration. 1.495 #ifndef OVR_ENABLE_THREADS 1.496 1.497 // Atomic exchange of val with argument. Returns old val. 1.498 inline static O_T Exchange_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p = val; return old; } 1.499 // Adds a new val to argument; returns its old val. 1.500 inline static O_T ExchangeAdd_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p += val; return old; } 1.501 // Compares the argument data with 'c' val. 1.502 // If succeeded, stores val int '*p' and returns true; otherwise returns false. 1.503 inline static bool CompareAndSet_NoSync(volatile O_T* p, O_T c, O_T val) { if (*p==c) { *p = val; return 1; } return 0; } 1.504 1.505 #endif 1.506 1.507 // If NoSync wrapped implementation may not be possible, it this block should be 1.508 // replaced with per-function implementation in O. 1.509 // "AtomicOpsRaw_DefImpl<O>::" prefix in calls below. 1.510 inline static O_T Exchange_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); } 1.511 inline static O_T Exchange_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); } 1.512 inline static O_T Exchange_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); } 1.513 inline static O_T ExchangeAdd_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); } 1.514 inline static O_T ExchangeAdd_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); } 1.515 inline static O_T ExchangeAdd_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); } 1.516 inline static bool CompareAndSet_Sync(volatile O_T* p, O_T c, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); } 1.517 inline static bool CompareAndSet_Release(volatile O_T* p, O_T c, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); } 1.518 inline static bool CompareAndSet_Acquire(volatile O_T* p, O_T c, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); } 1.519 1.520 // Loads and stores with memory fence. These have only the relevant versions. 1.521 #ifdef OVR_CPU_X86 1.522 // On X86, Store_Release is implemented as exchange. Note that we can also 1.523 // consider 'sfence' in the future, although it is not as compatible with older CPUs. 1.524 inline static void Store_Release(volatile O_T* p, O_T val) { Exchange_Release(p, val); } 1.525 #else 1.526 inline static void Store_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); *p = val; } 1.527 #endif 1.528 inline static O_T Load_Acquire(const volatile O_T* p) { O_AcquireSync sync; OVR_UNUSED(sync); return *p; } 1.529 }; 1.530 1.531 1.532 template<int size> 1.533 struct AtomicOpsRaw : public AtomicOpsRawBase { }; 1.534 1.535 template<> 1.536 struct AtomicOpsRaw<4> : public AtomicOpsRaw_DefImpl<AtomicOpsRaw_4ByteImpl> 1.537 { 1.538 // Ensure that assigned type size is correct. 1.539 AtomicOpsRaw() 1.540 { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl<AtomicOpsRaw_4ByteImpl>::T) == 4); } 1.541 }; 1.542 template<> 1.543 struct AtomicOpsRaw<8> : public AtomicOpsRaw_DefImpl<AtomicOpsRaw_8ByteImpl> 1.544 { 1.545 AtomicOpsRaw() 1.546 { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl<AtomicOpsRaw_8ByteImpl>::T) == 8); } 1.547 }; 1.548 1.549 1.550 // *** AtomicOps - implementation of atomic Ops for specified class 1.551 1.552 // Implements atomic ops on a class, provided that the object is either 1.553 // 4 or 8 bytes in size (depending on the AtomicOpsRaw specializations 1.554 // available). Relies on AtomicOpsRaw for much of implementation. 1.555 1.556 template<class C> 1.557 class AtomicOps 1.558 { 1.559 typedef AtomicOpsRaw<sizeof(C)> Ops; 1.560 typedef typename Ops::T T; 1.561 typedef volatile typename Ops::T* PT; 1.562 // We cast through unions to (1) avoid pointer size compiler warnings 1.563 // and (2) ensure that there are no problems with strict pointer aliasing. 1.564 union C2T_union { C c; T t; }; 1.565 1.566 public: 1.567 // General purpose implementation for standard syncs. 1.568 inline static C Exchange_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Sync((PT)p, u.t); return u.c; } 1.569 inline static C Exchange_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Release((PT)p, u.t); return u.c; } 1.570 inline static C Exchange_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Acquire((PT)p, u.t); return u.c; } 1.571 inline static C Exchange_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_NoSync((PT)p, u.t); return u.c; } 1.572 inline static C ExchangeAdd_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Sync((PT)p, u.t); return u.c; } 1.573 inline static C ExchangeAdd_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Release((PT)p, u.t); return u.c; } 1.574 inline static C ExchangeAdd_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Acquire((PT)p, u.t); return u.c; } 1.575 inline static C ExchangeAdd_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_NoSync((PT)p, u.t); return u.c; } 1.576 inline static bool CompareAndSet_Sync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Sync((PT)p, cu.t, u.t); } 1.577 inline static bool CompareAndSet_Release(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Release((PT)p, cu.t, u.t); } 1.578 inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } 1.579 inline static bool CompareAndSet_NoSync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_NoSync((PT)p, cu.t, u.t); } 1.580 // Loads and stores with memory fence. These have only the relevant versions. 1.581 inline static void Store_Release(volatile C* p, C val) { C2T_union u; u.c = val; Ops::Store_Release((PT)p, u.t); } 1.582 inline static C Load_Acquire(const volatile C* p) { C2T_union u; u.t = Ops::Load_Acquire((PT)p); return u.c; } 1.583 }; 1.584 1.585 1.586 1.587 // Atomic value base class - implements operations shared for integers and pointers. 1.588 template<class T> 1.589 class AtomicValueBase 1.590 { 1.591 protected: 1.592 typedef AtomicOps<T> Ops; 1.593 public: 1.594 1.595 volatile T Value; 1.596 1.597 inline AtomicValueBase() { } 1.598 explicit inline AtomicValueBase(T val) { Ops::Store_Release(&Value, val); } 1.599 1.600 // Most libraries (TBB and Joshua Scholar's) library do not do Load_Acquire 1.601 // here, since most algorithms do not require atomic loads. Needs some research. 1.602 inline operator T() const { return Value; } 1.603 1.604 // *** Standard Atomic inlines 1.605 inline T Exchange_Sync(T val) { return Ops::Exchange_Sync(&Value, val); } 1.606 inline T Exchange_Release(T val) { return Ops::Exchange_Release(&Value, val); } 1.607 inline T Exchange_Acquire(T val) { return Ops::Exchange_Acquire(&Value, val); } 1.608 inline T Exchange_NoSync(T val) { return Ops::Exchange_NoSync(&Value, val); } 1.609 inline bool CompareAndSet_Sync(T c, T val) { return Ops::CompareAndSet_Sync(&Value, c, val); } 1.610 inline bool CompareAndSet_Release(T c, T val) { return Ops::CompareAndSet_Release(&Value, c, val); } 1.611 inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Relse(&Value, c, val); } 1.612 inline bool CompareAndSet_NoSync(T c, T val) { return Ops::CompareAndSet_NoSync(&Value, c, val); } 1.613 // Load & Store. 1.614 inline void Store_Release(T val) { Ops::Store_Release(&Value, val); } 1.615 inline T Load_Acquire() const { return Ops::Load_Acquire(&Value); } 1.616 }; 1.617 1.618 1.619 // ***** AtomicPtr - Atomic pointer template 1.620 1.621 // This pointer class supports atomic assignments with release, 1.622 // increment / decrement operations, and conditional compare + set. 1.623 1.624 template<class T> 1.625 class AtomicPtr : public AtomicValueBase<T*> 1.626 { 1.627 typedef typename AtomicValueBase<T*>::Ops Ops; 1.628 1.629 public: 1.630 // Initialize pointer value to 0 by default; use Store_Release only with explicit constructor. 1.631 inline AtomicPtr() : AtomicValueBase<T*>() { this->Value = 0; } 1.632 explicit inline AtomicPtr(T* val) : AtomicValueBase<T*>(val) { } 1.633 1.634 // Pointer access. 1.635 inline T* operator -> () const { return this->Load_Acquire(); } 1.636 1.637 // It looks like it is convenient to have Load_Acquire characteristics 1.638 // for this, since that is convenient for algorithms such as linked 1.639 // list traversals that can be added to bu another thread. 1.640 inline operator T* () const { return this->Load_Acquire(); } 1.641 1.642 1.643 // *** Standard Atomic inlines (applicable to pointers) 1.644 1.645 // ExhangeAdd considers pointer size for pointers. 1.646 template<class I> 1.647 inline T* ExchangeAdd_Sync(I incr) { return Ops::ExchangeAdd_Sync(&this->Value, ((T*)0) + incr); } 1.648 template<class I> 1.649 inline T* ExchangeAdd_Release(I incr) { return Ops::ExchangeAdd_Release(&this->Value, ((T*)0) + incr); } 1.650 template<class I> 1.651 inline T* ExchangeAdd_Acquire(I incr) { return Ops::ExchangeAdd_Acquire(&this->Value, ((T*)0) + incr); } 1.652 template<class I> 1.653 inline T* ExchangeAdd_NoSync(I incr) { return Ops::ExchangeAdd_NoSync(&this->Value, ((T*)0) + incr); } 1.654 1.655 // *** Atomic Operators 1.656 1.657 inline T* operator = (T* val) { this->Store_Release(val); return val; } 1.658 1.659 template<class I> 1.660 inline T* operator += (I val) { return ExchangeAdd_Sync(val) + val; } 1.661 template<class I> 1.662 inline T* operator -= (I val) { return operator += (-val); } 1.663 1.664 inline T* operator ++ () { return ExchangeAdd_Sync(1) + 1; } 1.665 inline T* operator -- () { return ExchangeAdd_Sync(-1) - 1; } 1.666 inline T* operator ++ (int) { return ExchangeAdd_Sync(1); } 1.667 inline T* operator -- (int) { return ExchangeAdd_Sync(-1); } 1.668 }; 1.669 1.670 1.671 // ***** AtomicInt - Atomic integer template 1.672 1.673 // Implements an atomic integer type; the exact type to use is provided 1.674 // as an argument. Supports atomic Acquire / Release semantics, atomic 1.675 // arithmetic operations, and atomic conditional compare + set. 1.676 1.677 template<class T> 1.678 class AtomicInt : public AtomicValueBase<T> 1.679 { 1.680 typedef typename AtomicValueBase<T>::Ops Ops; 1.681 1.682 public: 1.683 inline AtomicInt() : AtomicValueBase<T>() { } 1.684 explicit inline AtomicInt(T val) : AtomicValueBase<T>(val) { } 1.685 1.686 1.687 // *** Standard Atomic inlines (applicable to int) 1.688 inline T ExchangeAdd_Sync(T val) { return Ops::ExchangeAdd_Sync(&this->Value, val); } 1.689 inline T ExchangeAdd_Release(T val) { return Ops::ExchangeAdd_Release(&this->Value, val); } 1.690 inline T ExchangeAdd_Acquire(T val) { return Ops::ExchangeAdd_Acquire(&this->Value, val); } 1.691 inline T ExchangeAdd_NoSync(T val) { return Ops::ExchangeAdd_NoSync(&this->Value, val); } 1.692 // These increments could be more efficient because they don't return a value. 1.693 inline void Increment_Sync() { ExchangeAdd_Sync((T)1); } 1.694 inline void Increment_Release() { ExchangeAdd_Release((T)1); } 1.695 inline void Increment_Acquire() { ExchangeAdd_Acquire((T)1); } 1.696 inline void Increment_NoSync() { ExchangeAdd_NoSync((T)1); } 1.697 1.698 // *** Atomic Operators 1.699 1.700 inline T operator = (T val) { this->Store_Release(val); return val; } 1.701 inline T operator += (T val) { return ExchangeAdd_Sync(val) + val; } 1.702 inline T operator -= (T val) { return ExchangeAdd_Sync(0 - val) - val; } 1.703 1.704 inline T operator ++ () { return ExchangeAdd_Sync((T)1) + 1; } 1.705 inline T operator -- () { return ExchangeAdd_Sync(((T)0)-1) - 1; } 1.706 inline T operator ++ (int) { return ExchangeAdd_Sync((T)1); } 1.707 inline T operator -- (int) { return ExchangeAdd_Sync(((T)0)-1); } 1.708 1.709 // More complex atomic operations. Leave it to compiler whether to optimize them or not. 1.710 T operator &= (T arg) 1.711 { 1.712 T comp, newVal; 1.713 do { 1.714 comp = this->Value; 1.715 newVal = comp & arg; 1.716 } while(!this->CompareAndSet_Sync(comp, newVal)); 1.717 return newVal; 1.718 } 1.719 1.720 T operator |= (T arg) 1.721 { 1.722 T comp, newVal; 1.723 do { 1.724 comp = this->Value; 1.725 newVal = comp | arg; 1.726 } while(!this->CompareAndSet_Sync(comp, newVal)); 1.727 return newVal; 1.728 } 1.729 1.730 T operator ^= (T arg) 1.731 { 1.732 T comp, newVal; 1.733 do { 1.734 comp = this->Value; 1.735 newVal = comp ^ arg; 1.736 } while(!this->CompareAndSet_Sync(comp, newVal)); 1.737 return newVal; 1.738 } 1.739 1.740 T operator *= (T arg) 1.741 { 1.742 T comp, newVal; 1.743 do { 1.744 comp = this->Value; 1.745 newVal = comp * arg; 1.746 } while(!this->CompareAndSet_Sync(comp, newVal)); 1.747 return newVal; 1.748 } 1.749 1.750 T operator /= (T arg) 1.751 { 1.752 T comp, newVal; 1.753 do { 1.754 comp = this->Value; 1.755 newVal = comp / arg; 1.756 } while(!CompareAndSet_Sync(comp, newVal)); 1.757 return newVal; 1.758 } 1.759 1.760 T operator >>= (unsigned bits) 1.761 { 1.762 T comp, newVal; 1.763 do { 1.764 comp = this->Value; 1.765 newVal = comp >> bits; 1.766 } while(!CompareAndSet_Sync(comp, newVal)); 1.767 return newVal; 1.768 } 1.769 1.770 T operator <<= (unsigned bits) 1.771 { 1.772 T comp, newVal; 1.773 do { 1.774 comp = this->Value; 1.775 newVal = comp << bits; 1.776 } while(!this->CompareAndSet_Sync(comp, newVal)); 1.777 return newVal; 1.778 } 1.779 }; 1.780 1.781 1.782 1.783 //----------------------------------------------------------------------------------- 1.784 // ***** Lock 1.785 1.786 // Lock is a simplest and most efficient mutual-exclusion lock class. 1.787 // Unlike Mutex, it cannot be waited on. 1.788 1.789 class Lock 1.790 { 1.791 // NOTE: Locks are not allocatable and they themselves should not allocate 1.792 // memory by standard means. This is the case because StandardAllocator 1.793 // relies on this class. 1.794 // Make 'delete' private. Don't do this for 'new' since it can be redefined. 1.795 void operator delete(void*) {} 1.796 1.797 1.798 // *** Lock implementation for various platforms. 1.799 1.800 #if !defined(OVR_ENABLE_THREADS) 1.801 1.802 public: 1.803 // With no thread support, lock does nothing. 1.804 inline Lock() { } 1.805 inline Lock(unsigned) { } 1.806 inline ~Lock() { } 1.807 inline void DoLock() { } 1.808 inline void Unlock() { } 1.809 1.810 // Windows. 1.811 #elif defined(OVR_OS_WIN32) 1.812 1.813 CRITICAL_SECTION cs; 1.814 public: 1.815 Lock(unsigned spinCount = 0); 1.816 ~Lock(); 1.817 // Locking functions. 1.818 inline void DoLock() { ::EnterCriticalSection(&cs); } 1.819 inline void Unlock() { ::LeaveCriticalSection(&cs); } 1.820 1.821 #else 1.822 pthread_mutex_t mutex; 1.823 1.824 public: 1.825 static pthread_mutexattr_t RecursiveAttr; 1.826 static bool RecursiveAttrInit; 1.827 1.828 Lock (unsigned dummy = 0) 1.829 { 1.830 if (!RecursiveAttrInit) 1.831 { 1.832 pthread_mutexattr_init(&RecursiveAttr); 1.833 pthread_mutexattr_settype(&RecursiveAttr, PTHREAD_MUTEX_RECURSIVE); 1.834 RecursiveAttrInit = 1; 1.835 } 1.836 pthread_mutex_init(&mutex,&RecursiveAttr); 1.837 } 1.838 ~Lock () { pthread_mutex_destroy(&mutex); } 1.839 inline void DoLock() { pthread_mutex_lock(&mutex); } 1.840 inline void Unlock() { pthread_mutex_unlock(&mutex); } 1.841 1.842 #endif // OVR_ENABLE_THREDS 1.843 1.844 1.845 public: 1.846 // Locker class, used for automatic locking 1.847 class Locker 1.848 { 1.849 public: 1.850 Lock *pLock; 1.851 inline Locker(Lock *plock) 1.852 { pLock = plock; pLock->DoLock(); } 1.853 inline ~Locker() 1.854 { pLock->Unlock(); } 1.855 }; 1.856 }; 1.857 1.858 1.859 1.860 } // OVR 1.861 1.862 #endif 1.863 \ No newline at end of file