ovr_sdk
diff LibOVR/Src/Kernel/OVR_Atomic.h @ 0:1b39a1b46319
initial 0.4.4
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Wed, 14 Jan 2015 06:51:16 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/LibOVR/Src/Kernel/OVR_Atomic.h Wed Jan 14 06:51:16 2015 +0200 1.3 @@ -0,0 +1,915 @@ 1.4 +/************************************************************************************ 1.5 + 1.6 +PublicHeader: OVR_Kernel.h 1.7 +Filename : OVR_Atomic.h 1.8 +Content : Contains atomic operations and inline fastest locking 1.9 + functionality. Will contain #ifdefs for OS efficiency. 1.10 + Have non-thread-safe implementaion if not available. 1.11 +Created : September 19, 2012 1.12 +Notes : 1.13 + 1.14 +Copyright : Copyright 2014 Oculus VR, LLC All Rights reserved. 1.15 + 1.16 +Licensed under the Oculus VR Rift SDK License Version 3.2 (the "License"); 1.17 +you may not use the Oculus VR Rift SDK except in compliance with the License, 1.18 +which is provided at the time of installation or download, or which 1.19 +otherwise accompanies this software in either electronic or hard copy form. 1.20 + 1.21 +You may obtain a copy of the License at 1.22 + 1.23 +http://www.oculusvr.com/licenses/LICENSE-3.2 1.24 + 1.25 +Unless required by applicable law or agreed to in writing, the Oculus VR SDK 1.26 +distributed under the License is distributed on an "AS IS" BASIS, 1.27 +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1.28 +See the License for the specific language governing permissions and 1.29 +limitations under the License. 1.30 + 1.31 +************************************************************************************/ 1.32 + 1.33 +#ifndef OVR_Atomic_h 1.34 +#define OVR_Atomic_h 1.35 + 1.36 +#include "OVR_Types.h" 1.37 + 1.38 +// Include System thread functionality. 1.39 +#if defined(OVR_OS_MS) && !defined(OVR_OS_MS_MOBILE) 1.40 +#ifndef WIN32_LEAN_AND_MEAN 1.41 +#define WIN32_LEAN_AND_MEAN 1.42 +#endif 1.43 +#include <Windows.h> 1.44 +#else 1.45 +#include <pthread.h> 1.46 +#endif 1.47 + 1.48 +#ifdef OVR_CC_MSVC 1.49 +#include <intrin.h> 1.50 +#pragma intrinsic(_ReadBarrier, _WriteBarrier, _ReadWriteBarrier) 1.51 +#endif 1.52 + 1.53 +namespace OVR { 1.54 + 1.55 + 1.56 +// ****** Declared classes 1.57 + 1.58 +// If there is NO thread support we implement AtomicOps and 1.59 +// Lock objects as no-ops. The other classes are not defined. 1.60 +template<class C> class AtomicOps; 1.61 +template<class T> class AtomicInt; 1.62 +template<class T> class AtomicPtr; 1.63 + 1.64 +class Lock; 1.65 + 1.66 + 1.67 +//----------------------------------------------------------------------------------- 1.68 +// ***** AtomicOps 1.69 + 1.70 +// Atomic operations are provided by the AtomicOps templates class, 1.71 +// implemented through system-specific AtomicOpsRaw specializations. 1.72 +// It provides several fundamental operations such as Exchange, ExchangeAdd 1.73 +// CompareAndSet, and Store_Release. Each function includes several memory 1.74 +// synchronization versions, important for multiprocessing CPUs with weak 1.75 +// memory consistency. The following memory fencing strategies are supported: 1.76 +// 1.77 +// - NoSync. No memory synchronization is done for atomic op. 1.78 +// - Release. All other memory writes are completed before atomic op 1.79 +// writes its results. 1.80 +// - Acquire. Further memory reads are forced to wait until atomic op 1.81 +// executes, guaranteeing that the right values will be seen. 1.82 +// - Sync. A combination of Release and Acquire. 1.83 + 1.84 + 1.85 +// *** AtomicOpsRaw 1.86 + 1.87 +// AtomicOpsRaw is a specialized template that provides atomic operations 1.88 +// used by AtomicOps. This class has two fundamental qualities: (1) it 1.89 +// defines a type T of correct size, and (2) provides operations that work 1.90 +// atomically, such as Exchange_Sync and CompareAndSet_Release. 1.91 + 1.92 +// AtomicOpsRawBase class contains shared constants/classes for AtomicOpsRaw. 1.93 +// The primary thing is does is define sync class objects, whose destructor and 1.94 +// constructor provide places to insert appropriate synchronization calls, on 1.95 +// systems where such calls are necessary. So far, the breakdown is as follows: 1.96 +// 1.97 +// - X86 systems don't need custom syncs, since their exchange/atomic 1.98 +// instructions are implicitly synchronized. 1.99 +// - PowerPC requires lwsync/isync instructions that can use this mechanism. 1.100 +// - If some other systems require a mechanism where syncing type is associated 1.101 +// with a particular instruction, the default implementation (which implements 1.102 +// all Sync, Acquire, and Release modes in terms of NoSync and fence) may not 1.103 +// work. Ii that case it will need to be #ifdef-ed conditionally. 1.104 + 1.105 +struct AtomicOpsRawBase 1.106 +{ 1.107 +#if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_CPU_X86_64) 1.108 + // Need to have empty constructor to avoid class 'unused' variable warning. 1.109 + struct FullSync { inline FullSync() { } }; 1.110 + struct AcquireSync { inline AcquireSync() { } }; 1.111 + struct ReleaseSync { inline ReleaseSync() { } }; 1.112 + 1.113 +#elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) 1.114 + struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("isync\n"); } }; 1.115 + struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("isync\n"); } }; 1.116 + struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; 1.117 + 1.118 +#elif defined(OVR_CPU_MIPS) 1.119 + struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("sync\n"); } }; 1.120 + struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("sync\n"); } }; 1.121 + struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; 1.122 + 1.123 +#elif defined(OVR_CPU_ARM) // Includes Android and iOS. 1.124 + struct FullSync { inline FullSync() { asm volatile("dmb\n"); } ~FullSync() { asm volatile("dmb\n"); } }; 1.125 + struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("dmb\n"); } }; 1.126 + struct ReleaseSync { inline ReleaseSync() { asm volatile("dmb\n"); } }; 1.127 + 1.128 +#elif defined(OVR_CC_GNU) && (__GNUC__ >= 4) 1.129 + // __sync functions are already full sync 1.130 + struct FullSync { inline FullSync() { } }; 1.131 + struct AcquireSync { inline AcquireSync() { } }; 1.132 + struct ReleaseSync { inline ReleaseSync() { } }; 1.133 +#endif 1.134 +}; 1.135 + 1.136 + 1.137 +// 4-Byte raw data atomic op implementation class. 1.138 +struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase 1.139 +{ 1.140 +#if !defined(OVR_ENABLE_THREADS) 1.141 + 1.142 + // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. 1.143 + typedef uint32_t T; 1.144 + 1.145 + // *** Thread - Safe Atomic Versions. 1.146 + 1.147 +#elif defined(OVR_OS_MS) 1.148 + 1.149 + // Use special defined for VC6, where volatile is not used and 1.150 + // InterlockedCompareExchange is declared incorrectly. 1.151 + typedef LONG T; 1.152 +#if defined(OVR_CC_MSVC) && (OVR_CC_MSVC < 1300) 1.153 + typedef T* InterlockTPtr; 1.154 + typedef LPVOID ET; 1.155 + typedef ET* InterlockETPtr; 1.156 +#else 1.157 + typedef volatile T* InterlockTPtr; 1.158 + typedef T ET; 1.159 + typedef InterlockTPtr InterlockETPtr; 1.160 +#endif 1.161 + inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange((InterlockTPtr)p, val); } 1.162 + inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd((InterlockTPtr)p, val); } 1.163 + inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange((InterlockETPtr)p, (ET)val, (ET)c) == (ET)c; } 1.164 + 1.165 +#elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) 1.166 + typedef uint32_t T; 1.167 + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) 1.168 + { 1.169 + uint32_t ret; 1.170 + 1.171 + asm volatile("1:\n\t" 1.172 + "lwarx %[r],0,%[i]\n\t" 1.173 + "stwcx. %[j],0,%[i]\n\t" 1.174 + "bne- 1b\n" 1.175 + : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); 1.176 + 1.177 + return ret; 1.178 + } 1.179 + 1.180 + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) 1.181 + { 1.182 + uint32_t dummy, ret; 1.183 + 1.184 + asm volatile("1:\n\t" 1.185 + "lwarx %[r],0,%[i]\n\t" 1.186 + "add %[o],%[r],%[j]\n\t" 1.187 + "stwcx. %[o],0,%[i]\n\t" 1.188 + "bne- 1b\n" 1.189 + : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); 1.190 + 1.191 + return ret; 1.192 + } 1.193 + 1.194 + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) 1.195 + { 1.196 + uint32_t ret; 1.197 + 1.198 + asm volatile("1:\n\t" 1.199 + "lwarx %[r],0,%[i]\n\t" 1.200 + "cmpw 0,%[r],%[cmp]\n\t" 1.201 + "mfcr %[r]\n\t" 1.202 + "bne- 2f\n\t" 1.203 + "stwcx. %[val],0,%[i]\n\t" 1.204 + "bne- 1b\n\t" 1.205 + "2:\n" 1.206 + : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc", "memory"); 1.207 + 1.208 + return (ret & 0x20000000) ? 1 : 0; 1.209 + } 1.210 + 1.211 +#elif defined(OVR_CPU_MIPS) 1.212 + typedef uint32_t T; 1.213 + 1.214 + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) 1.215 + { 1.216 + uint32_t ret; 1.217 + 1.218 + asm volatile("1:\n\t" 1.219 + "ll %[r],0(%[i])\n\t" 1.220 + "sc %[j],0(%[i])\n\t" 1.221 + "beq %[j],$0,1b\n\t" 1.222 + "nop \n" 1.223 + : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); 1.224 + 1.225 + return ret; 1.226 + } 1.227 + 1.228 + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) 1.229 + { 1.230 + uint32_t ret; 1.231 + 1.232 + asm volatile("1:\n\t" 1.233 + "ll %[r],0(%[i])\n\t" 1.234 + "addu %[j],%[r],%[j]\n\t" 1.235 + "sc %[j],0(%[i])\n\t" 1.236 + "beq %[j],$0,1b\n\t" 1.237 + "nop \n" 1.238 + : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); 1.239 + 1.240 + return ret; 1.241 + } 1.242 + 1.243 + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) 1.244 + { 1.245 + uint32_t ret, dummy; 1.246 + 1.247 + asm volatile("1:\n\t" 1.248 + "move %[r],$0\n\t" 1.249 + "ll %[o],0(%[i])\n\t" 1.250 + "bne %[o],%[c],2f\n\t" 1.251 + "move %[r],%[v]\n\t" 1.252 + "sc %[r],0(%[i])\n\t" 1.253 + "beq %[r],$0,1b\n\t" 1.254 + "nop \n\t" 1.255 + "2:\n" 1.256 + : "+m" (*i),[r] "=&d" (ret), [o] "=&d" (dummy) : [i] "d" (i), [c] "d" (c), [v] "d" (value) 1.257 + : "cc", "memory"); 1.258 + 1.259 + return ret; 1.260 + } 1.261 + 1.262 +#elif defined(OVR_CPU_ARM) && defined(OVR_CC_ARM) 1.263 + typedef uint32_t T; 1.264 + 1.265 + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) 1.266 + { 1.267 + for(;;) 1.268 + { 1.269 + T r = __ldrex(i); 1.270 + if (__strex(j, i) == 0) 1.271 + return r; 1.272 + } 1.273 + } 1.274 + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) 1.275 + { 1.276 + for(;;) 1.277 + { 1.278 + T r = __ldrex(i); 1.279 + if (__strex(r + j, i) == 0) 1.280 + return r; 1.281 + } 1.282 + } 1.283 + 1.284 + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) 1.285 + { 1.286 + for(;;) 1.287 + { 1.288 + T r = __ldrex(i); 1.289 + if (r != c) 1.290 + return 0; 1.291 + if (__strex(value, i) == 0) 1.292 + return 1; 1.293 + } 1.294 + } 1.295 + 1.296 +#elif defined(OVR_CPU_ARM) 1.297 + typedef uint32_t T; 1.298 + 1.299 + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) 1.300 + { 1.301 + uint32_t ret, dummy; 1.302 + 1.303 + asm volatile("1:\n\t" 1.304 + "ldrex %[r],[%[i]]\n\t" 1.305 + "strex %[t],%[j],[%[i]]\n\t" 1.306 + "cmp %[t],#0\n\t" 1.307 + "bne 1b\n\t" 1.308 + : "+m" (*i), [r] "=&r" (ret), [t] "=&r" (dummy) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); 1.309 + 1.310 + return ret; 1.311 + } 1.312 + 1.313 + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) 1.314 + { 1.315 + uint32_t ret, dummy, test; 1.316 + 1.317 + asm volatile("1:\n\t" 1.318 + "ldrex %[r],[%[i]]\n\t" 1.319 + "add %[o],%[r],%[j]\n\t" 1.320 + "strex %[t],%[o],[%[i]]\n\t" 1.321 + "cmp %[t],#0\n\t" 1.322 + "bne 1b\n\t" 1.323 + : "+m" (*i), [r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); 1.324 + 1.325 + return ret; 1.326 + } 1.327 + 1.328 + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) 1.329 + { 1.330 + uint32_t ret = 1, dummy, test; 1.331 + 1.332 + asm volatile("1:\n\t" 1.333 + "ldrex %[o],[%[i]]\n\t" 1.334 + "cmp %[o],%[c]\n\t" 1.335 + "bne 2f\n\t" 1.336 + "strex %[r],%[v],[%[i]]\n\t" 1.337 + "cmp %[r],#0\n\t" 1.338 + "bne 1b\n\t" 1.339 + "2:\n" 1.340 + : "+m" (*i),[r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [c] "r" (c), [v] "r" (value) 1.341 + : "cc", "memory"); 1.342 + 1.343 + return !ret; 1.344 + } 1.345 + 1.346 +#elif defined(OVR_CPU_X86) 1.347 + typedef uint32_t T; 1.348 + 1.349 + static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) 1.350 + { 1.351 + asm volatile("xchgl %1,%[i]\n" 1.352 + : "+m" (*i), "=q" (j) : [i] "m" (*i), "1" (j) : "cc", "memory"); 1.353 + 1.354 + return j; 1.355 + } 1.356 + 1.357 + static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) 1.358 + { 1.359 + asm volatile("lock; xaddl %1,%[i]\n" 1.360 + : "+m" (*i), "+q" (j) : [i] "m" (*i) : "cc", "memory"); 1.361 + 1.362 + return j; 1.363 + } 1.364 + 1.365 + static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) 1.366 + { 1.367 + uint32_t ret; 1.368 + 1.369 + asm volatile("lock; cmpxchgl %[v],%[i]\n" 1.370 + : "+m" (*i), "=a" (ret) : [i] "m" (*i), "1" (c), [v] "q" (value) : "cc", "memory"); 1.371 + 1.372 + return (ret == c); 1.373 + } 1.374 + 1.375 +#elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) 1.376 + 1.377 + typedef uint32_t T; 1.378 + 1.379 + static inline T Exchange_NoSync(volatile T *i, T j) 1.380 + { 1.381 + T v; 1.382 + do { 1.383 + v = *i; 1.384 + } while (!__sync_bool_compare_and_swap(i, v, j)); 1.385 + return v; 1.386 + } 1.387 + 1.388 + static inline T ExchangeAdd_NoSync(volatile T *i, T j) 1.389 + { 1.390 + return __sync_fetch_and_add(i, j); 1.391 + } 1.392 + 1.393 + static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) 1.394 + { 1.395 + return __sync_bool_compare_and_swap(i, c, value); 1.396 + } 1.397 + 1.398 +#endif // OS 1.399 +}; 1.400 + 1.401 + 1.402 +// 8-Byte raw data data atomic op implementation class. 1.403 +// Currently implementation is provided only on systems with 64-bit pointers. 1.404 +struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase 1.405 +{ 1.406 +#if !defined(OVR_64BIT_POINTERS) || !defined(OVR_ENABLE_THREADS) 1.407 + 1.408 + // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. 1.409 + typedef uint64_t T; 1.410 + 1.411 + // *** Thread - Safe OS specific versions. 1.412 +#elif defined(OVR_OS_MS) 1.413 + 1.414 + // This is only for 64-bit systems. 1.415 + typedef LONG64 T; 1.416 + typedef volatile T* InterlockTPtr; 1.417 + inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange64((InterlockTPtr)p, val); } 1.418 + inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd64((InterlockTPtr)p, val); } 1.419 + inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange64((InterlockTPtr)p, val, c) == c; } 1.420 + 1.421 +#elif defined(OVR_CPU_PPC64) 1.422 + 1.423 + typedef uint64_t T; 1.424 + 1.425 + static inline uint64_t Exchange_NoSync(volatile uint64_t *i, uint64_t j) 1.426 + { 1.427 + uint64_t dummy, ret; 1.428 + 1.429 + asm volatile("1:\n\t" 1.430 + "ldarx %[r],0,%[i]\n\t" 1.431 + "mr %[o],%[j]\n\t" 1.432 + "stdcx. %[o],0,%[i]\n\t" 1.433 + "bne- 1b\n" 1.434 + : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); 1.435 + 1.436 + return ret; 1.437 + } 1.438 + 1.439 + static inline uint64_t ExchangeAdd_NoSync(volatile uint64_t *i, uint64_t j) 1.440 + { 1.441 + uint64_t dummy, ret; 1.442 + 1.443 + asm volatile("1:\n\t" 1.444 + "ldarx %[r],0,%[i]\n\t" 1.445 + "add %[o],%[r],%[j]\n\t" 1.446 + "stdcx. %[o],0,%[i]\n\t" 1.447 + "bne- 1b\n" 1.448 + : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); 1.449 + 1.450 + return ret; 1.451 + } 1.452 + 1.453 + static inline bool CompareAndSet_NoSync(volatile uint64_t *i, uint64_t c, uint64_t value) 1.454 + { 1.455 + uint64_t ret, dummy; 1.456 + 1.457 + asm volatile("1:\n\t" 1.458 + "ldarx %[r],0,%[i]\n\t" 1.459 + "cmpw 0,%[r],%[cmp]\n\t" 1.460 + "mfcr %[r]\n\t" 1.461 + "bne- 2f\n\t" 1.462 + "stdcx. %[val],0,%[i]\n\t" 1.463 + "bne- 1b\n\t" 1.464 + "2:\n" 1.465 + : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc"); 1.466 + 1.467 + return (ret & 0x20000000) ? 1 : 0; 1.468 + } 1.469 + 1.470 +#elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) 1.471 + 1.472 + typedef uint64_t T; 1.473 + 1.474 + static inline T Exchange_NoSync(volatile T *i, T j) 1.475 + { 1.476 + T v; 1.477 + do { 1.478 + v = *i; 1.479 + } while (!__sync_bool_compare_and_swap(i, v, j)); 1.480 + return v; 1.481 + } 1.482 + 1.483 + static inline T ExchangeAdd_NoSync(volatile T *i, T j) 1.484 + { 1.485 + return __sync_fetch_and_add(i, j); 1.486 + } 1.487 + 1.488 + static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) 1.489 + { 1.490 + return __sync_bool_compare_and_swap(i, c, value); 1.491 + } 1.492 + 1.493 +#endif // OS 1.494 +}; 1.495 + 1.496 + 1.497 +// Default implementation for AtomicOpsRaw; provides implementation of mem-fenced 1.498 +// atomic operations where fencing is done with a sync object wrapped around a NoSync 1.499 +// operation implemented in the base class. If such implementation is not possible 1.500 +// on a given platform, #ifdefs can be used to disable it and then op functions can be 1.501 +// implemented individually in the appropriate AtomicOpsRaw<size> class. 1.502 + 1.503 +template<class O> 1.504 +struct AtomicOpsRaw_DefImpl : public O 1.505 +{ 1.506 + typedef typename O::T O_T; 1.507 + typedef typename O::FullSync O_FullSync; 1.508 + typedef typename O::AcquireSync O_AcquireSync; 1.509 + typedef typename O::ReleaseSync O_ReleaseSync; 1.510 + 1.511 + // If there is no thread support, provide the default implementation. In this case, 1.512 + // the base class (0) must still provide the T declaration. 1.513 +#ifndef OVR_ENABLE_THREADS 1.514 + 1.515 + // Atomic exchange of val with argument. Returns old val. 1.516 + inline static O_T Exchange_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p = val; return old; } 1.517 + // Adds a new val to argument; returns its old val. 1.518 + inline static O_T ExchangeAdd_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p += val; return old; } 1.519 + // Compares the argument data with 'c' val. 1.520 + // If succeeded, stores val int '*p' and returns true; otherwise returns false. 1.521 + inline static bool CompareAndSet_NoSync(volatile O_T* p, O_T c, O_T val) { if (*p==c) { *p = val; return 1; } return 0; } 1.522 + 1.523 +#endif 1.524 + 1.525 + // If NoSync wrapped implementation may not be possible, it this block should be 1.526 + // replaced with per-function implementation in O. 1.527 + // "AtomicOpsRaw_DefImpl<O>::" prefix in calls below. 1.528 + inline static O_T Exchange_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); } 1.529 + inline static O_T Exchange_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); } 1.530 + inline static O_T Exchange_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); } 1.531 + inline static O_T ExchangeAdd_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); } 1.532 + inline static O_T ExchangeAdd_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); } 1.533 + inline static O_T ExchangeAdd_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); } 1.534 + inline static bool CompareAndSet_Sync(volatile O_T* p, O_T c, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); } 1.535 + inline static bool CompareAndSet_Release(volatile O_T* p, O_T c, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); } 1.536 + inline static bool CompareAndSet_Acquire(volatile O_T* p, O_T c, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); } 1.537 + 1.538 + // Loads and stores with memory fence. These have only the relevant versions. 1.539 +#ifdef OVR_CPU_X86 1.540 + // On X86, Store_Release is implemented as exchange. Note that we can also 1.541 + // consider 'sfence' in the future, although it is not as compatible with older CPUs. 1.542 + inline static void Store_Release(volatile O_T* p, O_T val) { Exchange_Release(p, val); } 1.543 +#else 1.544 + inline static void Store_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); *p = val; } 1.545 +#endif 1.546 + inline static O_T Load_Acquire(const volatile O_T* p) 1.547 + { 1.548 + O_AcquireSync sync; 1.549 + OVR_UNUSED(sync); 1.550 + 1.551 +#if defined(OVR_CC_MSVC) 1.552 + _ReadBarrier(); // Compiler fence and load barrier 1.553 +#elif defined(OVR_CC_INTEL) 1.554 + __memory_barrier(); // Compiler fence 1.555 +#else 1.556 + // GCC-compatible: 1.557 + asm volatile ("" : : : "memory"); // Compiler fence 1.558 +#endif 1.559 + 1.560 + return *p; 1.561 + } 1.562 +}; 1.563 + 1.564 + 1.565 +template<int size> 1.566 +struct AtomicOpsRaw : public AtomicOpsRawBase { }; 1.567 + 1.568 +template<> 1.569 +struct AtomicOpsRaw<4> : public AtomicOpsRaw_DefImpl<AtomicOpsRaw_4ByteImpl> 1.570 +{ 1.571 + // Ensure that assigned type size is correct. 1.572 + AtomicOpsRaw() 1.573 + { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl<AtomicOpsRaw_4ByteImpl>::T) == 4); } 1.574 +}; 1.575 +template<> 1.576 +struct AtomicOpsRaw<8> : public AtomicOpsRaw_DefImpl<AtomicOpsRaw_8ByteImpl> 1.577 +{ 1.578 + AtomicOpsRaw() 1.579 + { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl<AtomicOpsRaw_8ByteImpl>::T) == 8); } 1.580 +}; 1.581 + 1.582 + 1.583 +// *** AtomicOps - implementation of atomic Ops for specified class 1.584 + 1.585 +// Implements atomic ops on a class, provided that the object is either 1.586 +// 4 or 8 bytes in size (depending on the AtomicOpsRaw specializations 1.587 +// available). Relies on AtomicOpsRaw for much of implementation. 1.588 + 1.589 +template<class C> 1.590 +class AtomicOps 1.591 +{ 1.592 + typedef AtomicOpsRaw<sizeof(C)> Ops; 1.593 + typedef typename Ops::T T; 1.594 + typedef volatile typename Ops::T* PT; 1.595 + // We cast through unions to (1) avoid pointer size compiler warnings 1.596 + // and (2) ensure that there are no problems with strict pointer aliasing. 1.597 + union C2T_union { C c; T t; }; 1.598 + 1.599 +public: 1.600 + // General purpose implementation for standard syncs. 1.601 + inline static C Exchange_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Sync((PT)p, u.t); return u.c; } 1.602 + inline static C Exchange_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Release((PT)p, u.t); return u.c; } 1.603 + inline static C Exchange_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Acquire((PT)p, u.t); return u.c; } 1.604 + inline static C Exchange_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_NoSync((PT)p, u.t); return u.c; } 1.605 + inline static C ExchangeAdd_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Sync((PT)p, u.t); return u.c; } 1.606 + inline static C ExchangeAdd_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Release((PT)p, u.t); return u.c; } 1.607 + inline static C ExchangeAdd_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Acquire((PT)p, u.t); return u.c; } 1.608 + inline static C ExchangeAdd_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_NoSync((PT)p, u.t); return u.c; } 1.609 + inline static bool CompareAndSet_Sync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Sync((PT)p, cu.t, u.t); } 1.610 + inline static bool CompareAndSet_Release(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Release((PT)p, cu.t, u.t); } 1.611 + inline static bool CompareAndSet_Acquire(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } 1.612 + inline static bool CompareAndSet_NoSync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_NoSync((PT)p, cu.t, u.t); } 1.613 + 1.614 + // Loads and stores with memory fence. These have only the relevant versions. 1.615 + inline static void Store_Release(volatile C* p, C val) { C2T_union u; u.c = val; Ops::Store_Release((PT)p, u.t); } 1.616 + inline static C Load_Acquire(const volatile C* p) { C2T_union u; u.t = Ops::Load_Acquire((PT)p); return u.c; } 1.617 + 1.618 + // Deprecated typo error: 1.619 + inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } 1.620 +}; 1.621 + 1.622 + 1.623 + 1.624 +// Atomic value base class - implements operations shared for integers and pointers. 1.625 +template<class T> 1.626 +class AtomicValueBase 1.627 +{ 1.628 +protected: 1.629 + typedef AtomicOps<T> Ops; 1.630 +public: 1.631 + 1.632 + volatile T Value; 1.633 + 1.634 + inline AtomicValueBase() { } 1.635 + explicit inline AtomicValueBase(T val) { Ops::Store_Release(&Value, val); } 1.636 + 1.637 + // Most libraries (TBB and Joshua Scholar's) library do not do Load_Acquire 1.638 + // here, since most algorithms do not require atomic loads. Needs some research. 1.639 + inline operator T() const { return Value; } 1.640 + 1.641 + // *** Standard Atomic inlines 1.642 + inline T Exchange_Sync(T val) { return Ops::Exchange_Sync(&Value, val); } 1.643 + inline T Exchange_Release(T val) { return Ops::Exchange_Release(&Value, val); } 1.644 + inline T Exchange_Acquire(T val) { return Ops::Exchange_Acquire(&Value, val); } 1.645 + inline T Exchange_NoSync(T val) { return Ops::Exchange_NoSync(&Value, val); } 1.646 + inline bool CompareAndSet_Sync(T c, T val) { return Ops::CompareAndSet_Sync(&Value, c, val); } 1.647 + inline bool CompareAndSet_Release(T c, T val) { return Ops::CompareAndSet_Release(&Value, c, val); } 1.648 + inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Acquire(&Value, c, val); } 1.649 + inline bool CompareAndSet_NoSync(T c, T val) { return Ops::CompareAndSet_NoSync(&Value, c, val); } 1.650 + // Load & Store. 1.651 + inline void Store_Release(T val) { Ops::Store_Release(&Value, val); } 1.652 + inline T Load_Acquire() const { return Ops::Load_Acquire(&Value); } 1.653 +}; 1.654 + 1.655 + 1.656 +// ***** AtomicPtr - Atomic pointer template 1.657 + 1.658 +// This pointer class supports atomic assignments with release, 1.659 +// increment / decrement operations, and conditional compare + set. 1.660 + 1.661 +template<class T> 1.662 +class AtomicPtr : public AtomicValueBase<T*> 1.663 +{ 1.664 + typedef typename AtomicValueBase<T*>::Ops Ops; 1.665 + 1.666 +public: 1.667 + // Initialize pointer value to 0 by default; use Store_Release only with explicit constructor. 1.668 + inline AtomicPtr() : AtomicValueBase<T*>() { this->Value = 0; } 1.669 + explicit inline AtomicPtr(T* val) : AtomicValueBase<T*>(val) { } 1.670 + 1.671 + // Pointer access. 1.672 + inline T* operator -> () const { return this->Load_Acquire(); } 1.673 + 1.674 + // It looks like it is convenient to have Load_Acquire characteristics 1.675 + // for this, since that is convenient for algorithms such as linked 1.676 + // list traversals that can be added to bu another thread. 1.677 + inline operator T* () const { return this->Load_Acquire(); } 1.678 + 1.679 + 1.680 + // *** Standard Atomic inlines (applicable to pointers) 1.681 + 1.682 + // ExhangeAdd considers pointer size for pointers. 1.683 + template<class I> 1.684 + inline T* ExchangeAdd_Sync(I incr) { return Ops::ExchangeAdd_Sync(&this->Value, ((T*)0) + incr); } 1.685 + template<class I> 1.686 + inline T* ExchangeAdd_Release(I incr) { return Ops::ExchangeAdd_Release(&this->Value, ((T*)0) + incr); } 1.687 + template<class I> 1.688 + inline T* ExchangeAdd_Acquire(I incr) { return Ops::ExchangeAdd_Acquire(&this->Value, ((T*)0) + incr); } 1.689 + template<class I> 1.690 + inline T* ExchangeAdd_NoSync(I incr) { return Ops::ExchangeAdd_NoSync(&this->Value, ((T*)0) + incr); } 1.691 + 1.692 + // *** Atomic Operators 1.693 + 1.694 + inline T* operator = (T* val) { this->Store_Release(val); return val; } 1.695 + 1.696 + template<class I> 1.697 + inline T* operator += (I val) { return ExchangeAdd_Sync(val) + val; } 1.698 + template<class I> 1.699 + inline T* operator -= (I val) { return operator += (-val); } 1.700 + 1.701 + inline T* operator ++ () { return ExchangeAdd_Sync(1) + 1; } 1.702 + inline T* operator -- () { return ExchangeAdd_Sync(-1) - 1; } 1.703 + inline T* operator ++ (int) { return ExchangeAdd_Sync(1); } 1.704 + inline T* operator -- (int) { return ExchangeAdd_Sync(-1); } 1.705 +}; 1.706 + 1.707 + 1.708 +// ***** AtomicInt - Atomic integer template 1.709 + 1.710 +// Implements an atomic integer type; the exact type to use is provided 1.711 +// as an argument. Supports atomic Acquire / Release semantics, atomic 1.712 +// arithmetic operations, and atomic conditional compare + set. 1.713 + 1.714 +template<class T> 1.715 +class AtomicInt : public AtomicValueBase<T> 1.716 +{ 1.717 + typedef typename AtomicValueBase<T>::Ops Ops; 1.718 + 1.719 +public: 1.720 + inline AtomicInt() : AtomicValueBase<T>() { } 1.721 + explicit inline AtomicInt(T val) : AtomicValueBase<T>(val) { } 1.722 + 1.723 + 1.724 + // *** Standard Atomic inlines (applicable to int) 1.725 + inline T ExchangeAdd_Sync(T val) { return Ops::ExchangeAdd_Sync(&this->Value, val); } 1.726 + inline T ExchangeAdd_Release(T val) { return Ops::ExchangeAdd_Release(&this->Value, val); } 1.727 + inline T ExchangeAdd_Acquire(T val) { return Ops::ExchangeAdd_Acquire(&this->Value, val); } 1.728 + inline T ExchangeAdd_NoSync(T val) { return Ops::ExchangeAdd_NoSync(&this->Value, val); } 1.729 + // These increments could be more efficient because they don't return a value. 1.730 + inline void Increment_Sync() { ExchangeAdd_Sync((T)1); } 1.731 + inline void Increment_Release() { ExchangeAdd_Release((T)1); } 1.732 + inline void Increment_Acquire() { ExchangeAdd_Acquire((T)1); } 1.733 + inline void Increment_NoSync() { ExchangeAdd_NoSync((T)1); } 1.734 + 1.735 + // *** Atomic Operators 1.736 + 1.737 + inline T operator = (T val) { this->Store_Release(val); return val; } 1.738 + inline T operator += (T val) { return ExchangeAdd_Sync(val) + val; } 1.739 + inline T operator -= (T val) { return ExchangeAdd_Sync(0 - val) - val; } 1.740 + 1.741 + inline T operator ++ () { return ExchangeAdd_Sync((T)1) + 1; } 1.742 + inline T operator -- () { return ExchangeAdd_Sync(((T)0)-1) - 1; } 1.743 + inline T operator ++ (int) { return ExchangeAdd_Sync((T)1); } 1.744 + inline T operator -- (int) { return ExchangeAdd_Sync(((T)0)-1); } 1.745 + 1.746 + // More complex atomic operations. Leave it to compiler whether to optimize them or not. 1.747 + T operator &= (T arg) 1.748 + { 1.749 + T comp, newVal; 1.750 + do { 1.751 + comp = this->Value; 1.752 + newVal = comp & arg; 1.753 + } while(!this->CompareAndSet_Sync(comp, newVal)); 1.754 + return newVal; 1.755 + } 1.756 + 1.757 + T operator |= (T arg) 1.758 + { 1.759 + T comp, newVal; 1.760 + do { 1.761 + comp = this->Value; 1.762 + newVal = comp | arg; 1.763 + } while(!this->CompareAndSet_Sync(comp, newVal)); 1.764 + return newVal; 1.765 + } 1.766 + 1.767 + T operator ^= (T arg) 1.768 + { 1.769 + T comp, newVal; 1.770 + do { 1.771 + comp = this->Value; 1.772 + newVal = comp ^ arg; 1.773 + } while(!this->CompareAndSet_Sync(comp, newVal)); 1.774 + return newVal; 1.775 + } 1.776 + 1.777 + T operator *= (T arg) 1.778 + { 1.779 + T comp, newVal; 1.780 + do { 1.781 + comp = this->Value; 1.782 + newVal = comp * arg; 1.783 + } while(!this->CompareAndSet_Sync(comp, newVal)); 1.784 + return newVal; 1.785 + } 1.786 + 1.787 + T operator /= (T arg) 1.788 + { 1.789 + T comp, newVal; 1.790 + do { 1.791 + comp = this->Value; 1.792 + newVal = comp / arg; 1.793 + } while(!CompareAndSet_Sync(comp, newVal)); 1.794 + return newVal; 1.795 + } 1.796 + 1.797 + T operator >>= (unsigned bits) 1.798 + { 1.799 + T comp, newVal; 1.800 + do { 1.801 + comp = this->Value; 1.802 + newVal = comp >> bits; 1.803 + } while(!CompareAndSet_Sync(comp, newVal)); 1.804 + return newVal; 1.805 + } 1.806 + 1.807 + T operator <<= (unsigned bits) 1.808 + { 1.809 + T comp, newVal; 1.810 + do { 1.811 + comp = this->Value; 1.812 + newVal = comp << bits; 1.813 + } while(!this->CompareAndSet_Sync(comp, newVal)); 1.814 + return newVal; 1.815 + } 1.816 +}; 1.817 + 1.818 + 1.819 +//----------------------------------------------------------------------------------- 1.820 +// ***** Lock 1.821 + 1.822 +// Lock is a simplest and most efficient mutual-exclusion lock class. 1.823 +// Unlike Mutex, it cannot be waited on. 1.824 + 1.825 +class Lock 1.826 +{ 1.827 + // NOTE: Locks are not allocatable and they themselves should not allocate 1.828 + // memory by standard means. This is the case because StandardAllocator 1.829 + // relies on this class. 1.830 + // Make 'delete' private. Don't do this for 'new' since it can be redefined. 1.831 + void operator delete(void*) {} 1.832 + 1.833 + 1.834 + // *** Lock implementation for various platforms. 1.835 + 1.836 +#if !defined(OVR_ENABLE_THREADS) 1.837 + 1.838 +public: 1.839 + // With no thread support, lock does nothing. 1.840 + inline Lock() { } 1.841 + inline Lock(unsigned) { } 1.842 + inline ~Lock() { } 1.843 + inline void DoLock() { } 1.844 + inline void Unlock() { } 1.845 + 1.846 + // Windows. 1.847 +#elif defined(OVR_OS_MS) 1.848 + 1.849 + CRITICAL_SECTION cs; 1.850 +public: 1.851 + Lock(unsigned spinCount = 10000); // Mutexes with non-zero spin counts usually result in better performance. 1.852 + ~Lock(); 1.853 + // Locking functions. 1.854 + inline void DoLock() { ::EnterCriticalSection(&cs); } 1.855 + inline void Unlock() { ::LeaveCriticalSection(&cs); } 1.856 + 1.857 +#else 1.858 + pthread_mutex_t mutex; 1.859 + 1.860 +public: 1.861 + static pthread_mutexattr_t RecursiveAttr; 1.862 + static bool RecursiveAttrInit; 1.863 + 1.864 + Lock (unsigned spinCount = 0) // To do: Support spin count, probably via a custom lock implementation. 1.865 + { 1.866 + OVR_UNUSED(spinCount); 1.867 + if (!RecursiveAttrInit) 1.868 + { 1.869 + pthread_mutexattr_init(&RecursiveAttr); 1.870 + pthread_mutexattr_settype(&RecursiveAttr, PTHREAD_MUTEX_RECURSIVE); 1.871 + RecursiveAttrInit = 1; 1.872 + } 1.873 + pthread_mutex_init(&mutex,&RecursiveAttr); 1.874 + } 1.875 + ~Lock () { pthread_mutex_destroy(&mutex); } 1.876 + inline void DoLock() { pthread_mutex_lock(&mutex); } 1.877 + inline void Unlock() { pthread_mutex_unlock(&mutex); } 1.878 + 1.879 +#endif // OVR_ENABLE_THREDS 1.880 + 1.881 + 1.882 +public: 1.883 + // Locker class, used for automatic locking 1.884 + class Locker 1.885 + { 1.886 + public: 1.887 + Lock *pLock; 1.888 + inline Locker(Lock *plock) 1.889 + { pLock = plock; pLock->DoLock(); } 1.890 + inline ~Locker() 1.891 + { pLock->Unlock(); } 1.892 + }; 1.893 +}; 1.894 + 1.895 + 1.896 +//------------------------------------------------------------------------------------- 1.897 +// Globally shared Lock implementation used for MessageHandlers, etc. 1.898 + 1.899 +class SharedLock 1.900 +{ 1.901 +public: 1.902 + SharedLock() : UseCount(0) {} 1.903 + 1.904 + Lock* GetLockAddRef(); 1.905 + void ReleaseLock(Lock* plock); 1.906 + 1.907 +private: 1.908 + Lock* toLock() { return (Lock*)Buffer; } 1.909 + 1.910 + // UseCount and max alignment. 1.911 + volatile int UseCount; 1.912 + uint64_t Buffer[(sizeof(Lock)+sizeof(uint64_t)-1)/sizeof(uint64_t)]; 1.913 +}; 1.914 + 1.915 + 1.916 +} // OVR 1.917 + 1.918 +#endif