nuclear@0: /************************************************************************************ nuclear@0: nuclear@0: PublicHeader: OVR_Kernel.h nuclear@0: Filename : OVR_Atomic.h nuclear@0: Content : Contains atomic operations and inline fastest locking nuclear@0: functionality. Will contain #ifdefs for OS efficiency. nuclear@0: Have non-thread-safe implementaion if not available. nuclear@0: Created : September 19, 2012 nuclear@0: Notes : nuclear@0: nuclear@0: Copyright : Copyright 2014 Oculus VR, LLC All Rights reserved. nuclear@0: nuclear@0: Licensed under the Oculus VR Rift SDK License Version 3.2 (the "License"); nuclear@0: you may not use the Oculus VR Rift SDK except in compliance with the License, nuclear@0: which is provided at the time of installation or download, or which nuclear@0: otherwise accompanies this software in either electronic or hard copy form. nuclear@0: nuclear@0: You may obtain a copy of the License at nuclear@0: nuclear@0: http://www.oculusvr.com/licenses/LICENSE-3.2 nuclear@0: nuclear@0: Unless required by applicable law or agreed to in writing, the Oculus VR SDK nuclear@0: distributed under the License is distributed on an "AS IS" BASIS, nuclear@0: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. nuclear@0: See the License for the specific language governing permissions and nuclear@0: limitations under the License. nuclear@0: nuclear@0: ************************************************************************************/ nuclear@0: nuclear@0: #ifndef OVR_Atomic_h nuclear@0: #define OVR_Atomic_h nuclear@0: nuclear@0: #include "OVR_Types.h" nuclear@0: nuclear@0: // Include System thread functionality. nuclear@0: #if defined(OVR_OS_MS) && !defined(OVR_OS_MS_MOBILE) nuclear@0: #ifndef WIN32_LEAN_AND_MEAN nuclear@0: #define WIN32_LEAN_AND_MEAN nuclear@0: #endif nuclear@0: #include nuclear@0: #else nuclear@0: #include nuclear@0: #endif nuclear@0: nuclear@0: #ifdef OVR_CC_MSVC nuclear@0: #include nuclear@0: #pragma intrinsic(_ReadBarrier, _WriteBarrier, _ReadWriteBarrier) nuclear@0: #endif nuclear@0: nuclear@0: namespace OVR { nuclear@0: nuclear@0: nuclear@0: // ****** Declared classes nuclear@0: nuclear@0: // If there is NO thread support we implement AtomicOps and nuclear@0: // Lock objects as no-ops. The other classes are not defined. nuclear@0: template class AtomicOps; nuclear@0: template class AtomicInt; nuclear@0: template class AtomicPtr; nuclear@0: nuclear@0: class Lock; nuclear@0: nuclear@0: nuclear@0: //----------------------------------------------------------------------------------- nuclear@0: // ***** AtomicOps nuclear@0: nuclear@0: // Atomic operations are provided by the AtomicOps templates class, nuclear@0: // implemented through system-specific AtomicOpsRaw specializations. nuclear@0: // It provides several fundamental operations such as Exchange, ExchangeAdd nuclear@0: // CompareAndSet, and Store_Release. Each function includes several memory nuclear@0: // synchronization versions, important for multiprocessing CPUs with weak nuclear@0: // memory consistency. The following memory fencing strategies are supported: nuclear@0: // nuclear@0: // - NoSync. No memory synchronization is done for atomic op. nuclear@0: // - Release. All other memory writes are completed before atomic op nuclear@0: // writes its results. nuclear@0: // - Acquire. Further memory reads are forced to wait until atomic op nuclear@0: // executes, guaranteeing that the right values will be seen. nuclear@0: // - Sync. A combination of Release and Acquire. nuclear@0: nuclear@0: nuclear@0: // *** AtomicOpsRaw nuclear@0: nuclear@0: // AtomicOpsRaw is a specialized template that provides atomic operations nuclear@0: // used by AtomicOps. This class has two fundamental qualities: (1) it nuclear@0: // defines a type T of correct size, and (2) provides operations that work nuclear@0: // atomically, such as Exchange_Sync and CompareAndSet_Release. nuclear@0: nuclear@0: // AtomicOpsRawBase class contains shared constants/classes for AtomicOpsRaw. nuclear@0: // The primary thing is does is define sync class objects, whose destructor and nuclear@0: // constructor provide places to insert appropriate synchronization calls, on nuclear@0: // systems where such calls are necessary. So far, the breakdown is as follows: nuclear@0: // nuclear@0: // - X86 systems don't need custom syncs, since their exchange/atomic nuclear@0: // instructions are implicitly synchronized. nuclear@0: // - PowerPC requires lwsync/isync instructions that can use this mechanism. nuclear@0: // - If some other systems require a mechanism where syncing type is associated nuclear@0: // with a particular instruction, the default implementation (which implements nuclear@0: // all Sync, Acquire, and Release modes in terms of NoSync and fence) may not nuclear@0: // work. Ii that case it will need to be #ifdef-ed conditionally. nuclear@0: nuclear@0: struct AtomicOpsRawBase nuclear@0: { nuclear@0: #if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_CPU_X86_64) nuclear@0: // Need to have empty constructor to avoid class 'unused' variable warning. nuclear@0: struct FullSync { inline FullSync() { } }; nuclear@0: struct AcquireSync { inline AcquireSync() { } }; nuclear@0: struct ReleaseSync { inline ReleaseSync() { } }; nuclear@0: nuclear@0: #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) nuclear@0: struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("isync\n"); } }; nuclear@0: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("isync\n"); } }; nuclear@0: struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; nuclear@0: nuclear@0: #elif defined(OVR_CPU_MIPS) nuclear@0: struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("sync\n"); } }; nuclear@0: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("sync\n"); } }; nuclear@0: struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; nuclear@0: nuclear@0: #elif defined(OVR_CPU_ARM) // Includes Android and iOS. nuclear@0: struct FullSync { inline FullSync() { asm volatile("dmb\n"); } ~FullSync() { asm volatile("dmb\n"); } }; nuclear@0: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("dmb\n"); } }; nuclear@0: struct ReleaseSync { inline ReleaseSync() { asm volatile("dmb\n"); } }; nuclear@0: nuclear@0: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4) nuclear@0: // __sync functions are already full sync nuclear@0: struct FullSync { inline FullSync() { } }; nuclear@0: struct AcquireSync { inline AcquireSync() { } }; nuclear@0: struct ReleaseSync { inline ReleaseSync() { } }; nuclear@0: #endif nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: // 4-Byte raw data atomic op implementation class. nuclear@0: struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase nuclear@0: { nuclear@0: #if !defined(OVR_ENABLE_THREADS) nuclear@0: nuclear@0: // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. nuclear@0: typedef uint32_t T; nuclear@0: nuclear@0: // *** Thread - Safe Atomic Versions. nuclear@0: nuclear@0: #elif defined(OVR_OS_MS) nuclear@0: nuclear@0: // Use special defined for VC6, where volatile is not used and nuclear@0: // InterlockedCompareExchange is declared incorrectly. nuclear@0: typedef LONG T; nuclear@0: #if defined(OVR_CC_MSVC) && (OVR_CC_MSVC < 1300) nuclear@0: typedef T* InterlockTPtr; nuclear@0: typedef LPVOID ET; nuclear@0: typedef ET* InterlockETPtr; nuclear@0: #else nuclear@0: typedef volatile T* InterlockTPtr; nuclear@0: typedef T ET; nuclear@0: typedef InterlockTPtr InterlockETPtr; nuclear@0: #endif nuclear@0: inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange((InterlockTPtr)p, val); } nuclear@0: inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd((InterlockTPtr)p, val); } nuclear@0: inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange((InterlockETPtr)p, (ET)val, (ET)c) == (ET)c; } nuclear@0: nuclear@0: #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) nuclear@0: typedef uint32_t T; nuclear@0: static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: uint32_t ret; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "lwarx %[r],0,%[i]\n\t" nuclear@0: "stwcx. %[j],0,%[i]\n\t" nuclear@0: "bne- 1b\n" nuclear@0: : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: uint32_t dummy, ret; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "lwarx %[r],0,%[i]\n\t" nuclear@0: "add %[o],%[r],%[j]\n\t" nuclear@0: "stwcx. %[o],0,%[i]\n\t" nuclear@0: "bne- 1b\n" nuclear@0: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) nuclear@0: { nuclear@0: uint32_t ret; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "lwarx %[r],0,%[i]\n\t" nuclear@0: "cmpw 0,%[r],%[cmp]\n\t" nuclear@0: "mfcr %[r]\n\t" nuclear@0: "bne- 2f\n\t" nuclear@0: "stwcx. %[val],0,%[i]\n\t" nuclear@0: "bne- 1b\n\t" nuclear@0: "2:\n" nuclear@0: : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc", "memory"); nuclear@0: nuclear@0: return (ret & 0x20000000) ? 1 : 0; nuclear@0: } nuclear@0: nuclear@0: #elif defined(OVR_CPU_MIPS) nuclear@0: typedef uint32_t T; nuclear@0: nuclear@0: static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: uint32_t ret; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "ll %[r],0(%[i])\n\t" nuclear@0: "sc %[j],0(%[i])\n\t" nuclear@0: "beq %[j],$0,1b\n\t" nuclear@0: "nop \n" nuclear@0: : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: uint32_t ret; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "ll %[r],0(%[i])\n\t" nuclear@0: "addu %[j],%[r],%[j]\n\t" nuclear@0: "sc %[j],0(%[i])\n\t" nuclear@0: "beq %[j],$0,1b\n\t" nuclear@0: "nop \n" nuclear@0: : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) nuclear@0: { nuclear@0: uint32_t ret, dummy; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "move %[r],$0\n\t" nuclear@0: "ll %[o],0(%[i])\n\t" nuclear@0: "bne %[o],%[c],2f\n\t" nuclear@0: "move %[r],%[v]\n\t" nuclear@0: "sc %[r],0(%[i])\n\t" nuclear@0: "beq %[r],$0,1b\n\t" nuclear@0: "nop \n\t" nuclear@0: "2:\n" nuclear@0: : "+m" (*i),[r] "=&d" (ret), [o] "=&d" (dummy) : [i] "d" (i), [c] "d" (c), [v] "d" (value) nuclear@0: : "cc", "memory"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: #elif defined(OVR_CPU_ARM) && defined(OVR_CC_ARM) nuclear@0: typedef uint32_t T; nuclear@0: nuclear@0: static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: for(;;) nuclear@0: { nuclear@0: T r = __ldrex(i); nuclear@0: if (__strex(j, i) == 0) nuclear@0: return r; nuclear@0: } nuclear@0: } nuclear@0: static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: for(;;) nuclear@0: { nuclear@0: T r = __ldrex(i); nuclear@0: if (__strex(r + j, i) == 0) nuclear@0: return r; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) nuclear@0: { nuclear@0: for(;;) nuclear@0: { nuclear@0: T r = __ldrex(i); nuclear@0: if (r != c) nuclear@0: return 0; nuclear@0: if (__strex(value, i) == 0) nuclear@0: return 1; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: #elif defined(OVR_CPU_ARM) nuclear@0: typedef uint32_t T; nuclear@0: nuclear@0: static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: uint32_t ret, dummy; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "ldrex %[r],[%[i]]\n\t" nuclear@0: "strex %[t],%[j],[%[i]]\n\t" nuclear@0: "cmp %[t],#0\n\t" nuclear@0: "bne 1b\n\t" nuclear@0: : "+m" (*i), [r] "=&r" (ret), [t] "=&r" (dummy) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: uint32_t ret, dummy, test; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "ldrex %[r],[%[i]]\n\t" nuclear@0: "add %[o],%[r],%[j]\n\t" nuclear@0: "strex %[t],%[o],[%[i]]\n\t" nuclear@0: "cmp %[t],#0\n\t" nuclear@0: "bne 1b\n\t" nuclear@0: : "+m" (*i), [r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) nuclear@0: { nuclear@0: uint32_t ret = 1, dummy, test; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "ldrex %[o],[%[i]]\n\t" nuclear@0: "cmp %[o],%[c]\n\t" nuclear@0: "bne 2f\n\t" nuclear@0: "strex %[r],%[v],[%[i]]\n\t" nuclear@0: "cmp %[r],#0\n\t" nuclear@0: "bne 1b\n\t" nuclear@0: "2:\n" nuclear@0: : "+m" (*i),[r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [c] "r" (c), [v] "r" (value) nuclear@0: : "cc", "memory"); nuclear@0: nuclear@0: return !ret; nuclear@0: } nuclear@0: nuclear@0: #elif defined(OVR_CPU_X86) nuclear@0: typedef uint32_t T; nuclear@0: nuclear@0: static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: asm volatile("xchgl %1,%[i]\n" nuclear@0: : "+m" (*i), "=q" (j) : [i] "m" (*i), "1" (j) : "cc", "memory"); nuclear@0: nuclear@0: return j; nuclear@0: } nuclear@0: nuclear@0: static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j) nuclear@0: { nuclear@0: asm volatile("lock; xaddl %1,%[i]\n" nuclear@0: : "+m" (*i), "+q" (j) : [i] "m" (*i) : "cc", "memory"); nuclear@0: nuclear@0: return j; nuclear@0: } nuclear@0: nuclear@0: static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value) nuclear@0: { nuclear@0: uint32_t ret; nuclear@0: nuclear@0: asm volatile("lock; cmpxchgl %[v],%[i]\n" nuclear@0: : "+m" (*i), "=a" (ret) : [i] "m" (*i), "1" (c), [v] "q" (value) : "cc", "memory"); nuclear@0: nuclear@0: return (ret == c); nuclear@0: } nuclear@0: nuclear@0: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) nuclear@0: nuclear@0: typedef uint32_t T; nuclear@0: nuclear@0: static inline T Exchange_NoSync(volatile T *i, T j) nuclear@0: { nuclear@0: T v; nuclear@0: do { nuclear@0: v = *i; nuclear@0: } while (!__sync_bool_compare_and_swap(i, v, j)); nuclear@0: return v; nuclear@0: } nuclear@0: nuclear@0: static inline T ExchangeAdd_NoSync(volatile T *i, T j) nuclear@0: { nuclear@0: return __sync_fetch_and_add(i, j); nuclear@0: } nuclear@0: nuclear@0: static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) nuclear@0: { nuclear@0: return __sync_bool_compare_and_swap(i, c, value); nuclear@0: } nuclear@0: nuclear@0: #endif // OS nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: // 8-Byte raw data data atomic op implementation class. nuclear@0: // Currently implementation is provided only on systems with 64-bit pointers. nuclear@0: struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase nuclear@0: { nuclear@0: #if !defined(OVR_64BIT_POINTERS) || !defined(OVR_ENABLE_THREADS) nuclear@0: nuclear@0: // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. nuclear@0: typedef uint64_t T; nuclear@0: nuclear@0: // *** Thread - Safe OS specific versions. nuclear@0: #elif defined(OVR_OS_MS) nuclear@0: nuclear@0: // This is only for 64-bit systems. nuclear@0: typedef LONG64 T; nuclear@0: typedef volatile T* InterlockTPtr; nuclear@0: inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange64((InterlockTPtr)p, val); } nuclear@0: inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd64((InterlockTPtr)p, val); } nuclear@0: inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange64((InterlockTPtr)p, val, c) == c; } nuclear@0: nuclear@0: #elif defined(OVR_CPU_PPC64) nuclear@0: nuclear@0: typedef uint64_t T; nuclear@0: nuclear@0: static inline uint64_t Exchange_NoSync(volatile uint64_t *i, uint64_t j) nuclear@0: { nuclear@0: uint64_t dummy, ret; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "ldarx %[r],0,%[i]\n\t" nuclear@0: "mr %[o],%[j]\n\t" nuclear@0: "stdcx. %[o],0,%[i]\n\t" nuclear@0: "bne- 1b\n" nuclear@0: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: static inline uint64_t ExchangeAdd_NoSync(volatile uint64_t *i, uint64_t j) nuclear@0: { nuclear@0: uint64_t dummy, ret; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "ldarx %[r],0,%[i]\n\t" nuclear@0: "add %[o],%[r],%[j]\n\t" nuclear@0: "stdcx. %[o],0,%[i]\n\t" nuclear@0: "bne- 1b\n" nuclear@0: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); nuclear@0: nuclear@0: return ret; nuclear@0: } nuclear@0: nuclear@0: static inline bool CompareAndSet_NoSync(volatile uint64_t *i, uint64_t c, uint64_t value) nuclear@0: { nuclear@0: uint64_t ret, dummy; nuclear@0: nuclear@0: asm volatile("1:\n\t" nuclear@0: "ldarx %[r],0,%[i]\n\t" nuclear@0: "cmpw 0,%[r],%[cmp]\n\t" nuclear@0: "mfcr %[r]\n\t" nuclear@0: "bne- 2f\n\t" nuclear@0: "stdcx. %[val],0,%[i]\n\t" nuclear@0: "bne- 1b\n\t" nuclear@0: "2:\n" nuclear@0: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc"); nuclear@0: nuclear@0: return (ret & 0x20000000) ? 1 : 0; nuclear@0: } nuclear@0: nuclear@0: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) nuclear@0: nuclear@0: typedef uint64_t T; nuclear@0: nuclear@0: static inline T Exchange_NoSync(volatile T *i, T j) nuclear@0: { nuclear@0: T v; nuclear@0: do { nuclear@0: v = *i; nuclear@0: } while (!__sync_bool_compare_and_swap(i, v, j)); nuclear@0: return v; nuclear@0: } nuclear@0: nuclear@0: static inline T ExchangeAdd_NoSync(volatile T *i, T j) nuclear@0: { nuclear@0: return __sync_fetch_and_add(i, j); nuclear@0: } nuclear@0: nuclear@0: static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) nuclear@0: { nuclear@0: return __sync_bool_compare_and_swap(i, c, value); nuclear@0: } nuclear@0: nuclear@0: #endif // OS nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: // Default implementation for AtomicOpsRaw; provides implementation of mem-fenced nuclear@0: // atomic operations where fencing is done with a sync object wrapped around a NoSync nuclear@0: // operation implemented in the base class. If such implementation is not possible nuclear@0: // on a given platform, #ifdefs can be used to disable it and then op functions can be nuclear@0: // implemented individually in the appropriate AtomicOpsRaw class. nuclear@0: nuclear@0: template nuclear@0: struct AtomicOpsRaw_DefImpl : public O nuclear@0: { nuclear@0: typedef typename O::T O_T; nuclear@0: typedef typename O::FullSync O_FullSync; nuclear@0: typedef typename O::AcquireSync O_AcquireSync; nuclear@0: typedef typename O::ReleaseSync O_ReleaseSync; nuclear@0: nuclear@0: // If there is no thread support, provide the default implementation. In this case, nuclear@0: // the base class (0) must still provide the T declaration. nuclear@0: #ifndef OVR_ENABLE_THREADS nuclear@0: nuclear@0: // Atomic exchange of val with argument. Returns old val. nuclear@0: inline static O_T Exchange_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p = val; return old; } nuclear@0: // Adds a new val to argument; returns its old val. nuclear@0: inline static O_T ExchangeAdd_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p += val; return old; } nuclear@0: // Compares the argument data with 'c' val. nuclear@0: // If succeeded, stores val int '*p' and returns true; otherwise returns false. nuclear@0: inline static bool CompareAndSet_NoSync(volatile O_T* p, O_T c, O_T val) { if (*p==c) { *p = val; return 1; } return 0; } nuclear@0: nuclear@0: #endif nuclear@0: nuclear@0: // If NoSync wrapped implementation may not be possible, it this block should be nuclear@0: // replaced with per-function implementation in O. nuclear@0: // "AtomicOpsRaw_DefImpl::" prefix in calls below. nuclear@0: inline static O_T Exchange_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@0: inline static O_T Exchange_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@0: inline static O_T Exchange_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@0: inline static O_T ExchangeAdd_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@0: inline static O_T ExchangeAdd_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@0: inline static O_T ExchangeAdd_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@0: inline static bool CompareAndSet_Sync(volatile O_T* p, O_T c, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@0: inline static bool CompareAndSet_Release(volatile O_T* p, O_T c, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@0: inline static bool CompareAndSet_Acquire(volatile O_T* p, O_T c, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@0: nuclear@0: // Loads and stores with memory fence. These have only the relevant versions. nuclear@0: #ifdef OVR_CPU_X86 nuclear@0: // On X86, Store_Release is implemented as exchange. Note that we can also nuclear@0: // consider 'sfence' in the future, although it is not as compatible with older CPUs. nuclear@0: inline static void Store_Release(volatile O_T* p, O_T val) { Exchange_Release(p, val); } nuclear@0: #else nuclear@0: inline static void Store_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); *p = val; } nuclear@0: #endif nuclear@0: inline static O_T Load_Acquire(const volatile O_T* p) nuclear@0: { nuclear@0: O_AcquireSync sync; nuclear@0: OVR_UNUSED(sync); nuclear@0: nuclear@0: #if defined(OVR_CC_MSVC) nuclear@0: _ReadBarrier(); // Compiler fence and load barrier nuclear@0: #elif defined(OVR_CC_INTEL) nuclear@0: __memory_barrier(); // Compiler fence nuclear@0: #else nuclear@0: // GCC-compatible: nuclear@0: asm volatile ("" : : : "memory"); // Compiler fence nuclear@0: #endif nuclear@0: nuclear@0: return *p; nuclear@0: } nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: template nuclear@0: struct AtomicOpsRaw : public AtomicOpsRawBase { }; nuclear@0: nuclear@0: template<> nuclear@0: struct AtomicOpsRaw<4> : public AtomicOpsRaw_DefImpl nuclear@0: { nuclear@0: // Ensure that assigned type size is correct. nuclear@0: AtomicOpsRaw() nuclear@0: { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl::T) == 4); } nuclear@0: }; nuclear@0: template<> nuclear@0: struct AtomicOpsRaw<8> : public AtomicOpsRaw_DefImpl nuclear@0: { nuclear@0: AtomicOpsRaw() nuclear@0: { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl::T) == 8); } nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: // *** AtomicOps - implementation of atomic Ops for specified class nuclear@0: nuclear@0: // Implements atomic ops on a class, provided that the object is either nuclear@0: // 4 or 8 bytes in size (depending on the AtomicOpsRaw specializations nuclear@0: // available). Relies on AtomicOpsRaw for much of implementation. nuclear@0: nuclear@0: template nuclear@0: class AtomicOps nuclear@0: { nuclear@0: typedef AtomicOpsRaw Ops; nuclear@0: typedef typename Ops::T T; nuclear@0: typedef volatile typename Ops::T* PT; nuclear@0: // We cast through unions to (1) avoid pointer size compiler warnings nuclear@0: // and (2) ensure that there are no problems with strict pointer aliasing. nuclear@0: union C2T_union { C c; T t; }; nuclear@0: nuclear@0: public: nuclear@0: // General purpose implementation for standard syncs. nuclear@0: inline static C Exchange_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Sync((PT)p, u.t); return u.c; } nuclear@0: inline static C Exchange_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Release((PT)p, u.t); return u.c; } nuclear@0: inline static C Exchange_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Acquire((PT)p, u.t); return u.c; } nuclear@0: inline static C Exchange_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_NoSync((PT)p, u.t); return u.c; } nuclear@0: inline static C ExchangeAdd_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Sync((PT)p, u.t); return u.c; } nuclear@0: inline static C ExchangeAdd_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Release((PT)p, u.t); return u.c; } nuclear@0: inline static C ExchangeAdd_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Acquire((PT)p, u.t); return u.c; } nuclear@0: inline static C ExchangeAdd_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_NoSync((PT)p, u.t); return u.c; } nuclear@0: inline static bool CompareAndSet_Sync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Sync((PT)p, cu.t, u.t); } nuclear@0: inline static bool CompareAndSet_Release(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Release((PT)p, cu.t, u.t); } nuclear@0: inline static bool CompareAndSet_Acquire(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } nuclear@0: inline static bool CompareAndSet_NoSync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_NoSync((PT)p, cu.t, u.t); } nuclear@0: nuclear@0: // Loads and stores with memory fence. These have only the relevant versions. nuclear@0: inline static void Store_Release(volatile C* p, C val) { C2T_union u; u.c = val; Ops::Store_Release((PT)p, u.t); } nuclear@0: inline static C Load_Acquire(const volatile C* p) { C2T_union u; u.t = Ops::Load_Acquire((PT)p); return u.c; } nuclear@0: nuclear@0: // Deprecated typo error: nuclear@0: inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: nuclear@0: // Atomic value base class - implements operations shared for integers and pointers. nuclear@0: template nuclear@0: class AtomicValueBase nuclear@0: { nuclear@0: protected: nuclear@0: typedef AtomicOps Ops; nuclear@0: public: nuclear@0: nuclear@0: volatile T Value; nuclear@0: nuclear@0: inline AtomicValueBase() { } nuclear@0: explicit inline AtomicValueBase(T val) { Ops::Store_Release(&Value, val); } nuclear@0: nuclear@0: // Most libraries (TBB and Joshua Scholar's) library do not do Load_Acquire nuclear@0: // here, since most algorithms do not require atomic loads. Needs some research. nuclear@0: inline operator T() const { return Value; } nuclear@0: nuclear@0: // *** Standard Atomic inlines nuclear@0: inline T Exchange_Sync(T val) { return Ops::Exchange_Sync(&Value, val); } nuclear@0: inline T Exchange_Release(T val) { return Ops::Exchange_Release(&Value, val); } nuclear@0: inline T Exchange_Acquire(T val) { return Ops::Exchange_Acquire(&Value, val); } nuclear@0: inline T Exchange_NoSync(T val) { return Ops::Exchange_NoSync(&Value, val); } nuclear@0: inline bool CompareAndSet_Sync(T c, T val) { return Ops::CompareAndSet_Sync(&Value, c, val); } nuclear@0: inline bool CompareAndSet_Release(T c, T val) { return Ops::CompareAndSet_Release(&Value, c, val); } nuclear@0: inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Acquire(&Value, c, val); } nuclear@0: inline bool CompareAndSet_NoSync(T c, T val) { return Ops::CompareAndSet_NoSync(&Value, c, val); } nuclear@0: // Load & Store. nuclear@0: inline void Store_Release(T val) { Ops::Store_Release(&Value, val); } nuclear@0: inline T Load_Acquire() const { return Ops::Load_Acquire(&Value); } nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: // ***** AtomicPtr - Atomic pointer template nuclear@0: nuclear@0: // This pointer class supports atomic assignments with release, nuclear@0: // increment / decrement operations, and conditional compare + set. nuclear@0: nuclear@0: template nuclear@0: class AtomicPtr : public AtomicValueBase nuclear@0: { nuclear@0: typedef typename AtomicValueBase::Ops Ops; nuclear@0: nuclear@0: public: nuclear@0: // Initialize pointer value to 0 by default; use Store_Release only with explicit constructor. nuclear@0: inline AtomicPtr() : AtomicValueBase() { this->Value = 0; } nuclear@0: explicit inline AtomicPtr(T* val) : AtomicValueBase(val) { } nuclear@0: nuclear@0: // Pointer access. nuclear@0: inline T* operator -> () const { return this->Load_Acquire(); } nuclear@0: nuclear@0: // It looks like it is convenient to have Load_Acquire characteristics nuclear@0: // for this, since that is convenient for algorithms such as linked nuclear@0: // list traversals that can be added to bu another thread. nuclear@0: inline operator T* () const { return this->Load_Acquire(); } nuclear@0: nuclear@0: nuclear@0: // *** Standard Atomic inlines (applicable to pointers) nuclear@0: nuclear@0: // ExhangeAdd considers pointer size for pointers. nuclear@0: template nuclear@0: inline T* ExchangeAdd_Sync(I incr) { return Ops::ExchangeAdd_Sync(&this->Value, ((T*)0) + incr); } nuclear@0: template nuclear@0: inline T* ExchangeAdd_Release(I incr) { return Ops::ExchangeAdd_Release(&this->Value, ((T*)0) + incr); } nuclear@0: template nuclear@0: inline T* ExchangeAdd_Acquire(I incr) { return Ops::ExchangeAdd_Acquire(&this->Value, ((T*)0) + incr); } nuclear@0: template nuclear@0: inline T* ExchangeAdd_NoSync(I incr) { return Ops::ExchangeAdd_NoSync(&this->Value, ((T*)0) + incr); } nuclear@0: nuclear@0: // *** Atomic Operators nuclear@0: nuclear@0: inline T* operator = (T* val) { this->Store_Release(val); return val; } nuclear@0: nuclear@0: template nuclear@0: inline T* operator += (I val) { return ExchangeAdd_Sync(val) + val; } nuclear@0: template nuclear@0: inline T* operator -= (I val) { return operator += (-val); } nuclear@0: nuclear@0: inline T* operator ++ () { return ExchangeAdd_Sync(1) + 1; } nuclear@0: inline T* operator -- () { return ExchangeAdd_Sync(-1) - 1; } nuclear@0: inline T* operator ++ (int) { return ExchangeAdd_Sync(1); } nuclear@0: inline T* operator -- (int) { return ExchangeAdd_Sync(-1); } nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: // ***** AtomicInt - Atomic integer template nuclear@0: nuclear@0: // Implements an atomic integer type; the exact type to use is provided nuclear@0: // as an argument. Supports atomic Acquire / Release semantics, atomic nuclear@0: // arithmetic operations, and atomic conditional compare + set. nuclear@0: nuclear@0: template nuclear@0: class AtomicInt : public AtomicValueBase nuclear@0: { nuclear@0: typedef typename AtomicValueBase::Ops Ops; nuclear@0: nuclear@0: public: nuclear@0: inline AtomicInt() : AtomicValueBase() { } nuclear@0: explicit inline AtomicInt(T val) : AtomicValueBase(val) { } nuclear@0: nuclear@0: nuclear@0: // *** Standard Atomic inlines (applicable to int) nuclear@0: inline T ExchangeAdd_Sync(T val) { return Ops::ExchangeAdd_Sync(&this->Value, val); } nuclear@0: inline T ExchangeAdd_Release(T val) { return Ops::ExchangeAdd_Release(&this->Value, val); } nuclear@0: inline T ExchangeAdd_Acquire(T val) { return Ops::ExchangeAdd_Acquire(&this->Value, val); } nuclear@0: inline T ExchangeAdd_NoSync(T val) { return Ops::ExchangeAdd_NoSync(&this->Value, val); } nuclear@0: // These increments could be more efficient because they don't return a value. nuclear@0: inline void Increment_Sync() { ExchangeAdd_Sync((T)1); } nuclear@0: inline void Increment_Release() { ExchangeAdd_Release((T)1); } nuclear@0: inline void Increment_Acquire() { ExchangeAdd_Acquire((T)1); } nuclear@0: inline void Increment_NoSync() { ExchangeAdd_NoSync((T)1); } nuclear@0: nuclear@0: // *** Atomic Operators nuclear@0: nuclear@0: inline T operator = (T val) { this->Store_Release(val); return val; } nuclear@0: inline T operator += (T val) { return ExchangeAdd_Sync(val) + val; } nuclear@0: inline T operator -= (T val) { return ExchangeAdd_Sync(0 - val) - val; } nuclear@0: nuclear@0: inline T operator ++ () { return ExchangeAdd_Sync((T)1) + 1; } nuclear@0: inline T operator -- () { return ExchangeAdd_Sync(((T)0)-1) - 1; } nuclear@0: inline T operator ++ (int) { return ExchangeAdd_Sync((T)1); } nuclear@0: inline T operator -- (int) { return ExchangeAdd_Sync(((T)0)-1); } nuclear@0: nuclear@0: // More complex atomic operations. Leave it to compiler whether to optimize them or not. nuclear@0: T operator &= (T arg) nuclear@0: { nuclear@0: T comp, newVal; nuclear@0: do { nuclear@0: comp = this->Value; nuclear@0: newVal = comp & arg; nuclear@0: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@0: return newVal; nuclear@0: } nuclear@0: nuclear@0: T operator |= (T arg) nuclear@0: { nuclear@0: T comp, newVal; nuclear@0: do { nuclear@0: comp = this->Value; nuclear@0: newVal = comp | arg; nuclear@0: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@0: return newVal; nuclear@0: } nuclear@0: nuclear@0: T operator ^= (T arg) nuclear@0: { nuclear@0: T comp, newVal; nuclear@0: do { nuclear@0: comp = this->Value; nuclear@0: newVal = comp ^ arg; nuclear@0: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@0: return newVal; nuclear@0: } nuclear@0: nuclear@0: T operator *= (T arg) nuclear@0: { nuclear@0: T comp, newVal; nuclear@0: do { nuclear@0: comp = this->Value; nuclear@0: newVal = comp * arg; nuclear@0: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@0: return newVal; nuclear@0: } nuclear@0: nuclear@0: T operator /= (T arg) nuclear@0: { nuclear@0: T comp, newVal; nuclear@0: do { nuclear@0: comp = this->Value; nuclear@0: newVal = comp / arg; nuclear@0: } while(!CompareAndSet_Sync(comp, newVal)); nuclear@0: return newVal; nuclear@0: } nuclear@0: nuclear@0: T operator >>= (unsigned bits) nuclear@0: { nuclear@0: T comp, newVal; nuclear@0: do { nuclear@0: comp = this->Value; nuclear@0: newVal = comp >> bits; nuclear@0: } while(!CompareAndSet_Sync(comp, newVal)); nuclear@0: return newVal; nuclear@0: } nuclear@0: nuclear@0: T operator <<= (unsigned bits) nuclear@0: { nuclear@0: T comp, newVal; nuclear@0: do { nuclear@0: comp = this->Value; nuclear@0: newVal = comp << bits; nuclear@0: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@0: return newVal; nuclear@0: } nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: //----------------------------------------------------------------------------------- nuclear@0: // ***** Lock nuclear@0: nuclear@0: // Lock is a simplest and most efficient mutual-exclusion lock class. nuclear@0: // Unlike Mutex, it cannot be waited on. nuclear@0: nuclear@0: class Lock nuclear@0: { nuclear@0: // NOTE: Locks are not allocatable and they themselves should not allocate nuclear@0: // memory by standard means. This is the case because StandardAllocator nuclear@0: // relies on this class. nuclear@0: // Make 'delete' private. Don't do this for 'new' since it can be redefined. nuclear@0: void operator delete(void*) {} nuclear@0: nuclear@0: nuclear@0: // *** Lock implementation for various platforms. nuclear@0: nuclear@0: #if !defined(OVR_ENABLE_THREADS) nuclear@0: nuclear@0: public: nuclear@0: // With no thread support, lock does nothing. nuclear@0: inline Lock() { } nuclear@0: inline Lock(unsigned) { } nuclear@0: inline ~Lock() { } nuclear@0: inline void DoLock() { } nuclear@0: inline void Unlock() { } nuclear@0: nuclear@0: // Windows. nuclear@0: #elif defined(OVR_OS_MS) nuclear@0: nuclear@0: CRITICAL_SECTION cs; nuclear@0: public: nuclear@0: Lock(unsigned spinCount = 10000); // Mutexes with non-zero spin counts usually result in better performance. nuclear@0: ~Lock(); nuclear@0: // Locking functions. nuclear@0: inline void DoLock() { ::EnterCriticalSection(&cs); } nuclear@0: inline void Unlock() { ::LeaveCriticalSection(&cs); } nuclear@0: nuclear@0: #else nuclear@0: pthread_mutex_t mutex; nuclear@0: nuclear@0: public: nuclear@0: static pthread_mutexattr_t RecursiveAttr; nuclear@0: static bool RecursiveAttrInit; nuclear@0: nuclear@0: Lock (unsigned spinCount = 0) // To do: Support spin count, probably via a custom lock implementation. nuclear@0: { nuclear@0: OVR_UNUSED(spinCount); nuclear@0: if (!RecursiveAttrInit) nuclear@0: { nuclear@0: pthread_mutexattr_init(&RecursiveAttr); nuclear@0: pthread_mutexattr_settype(&RecursiveAttr, PTHREAD_MUTEX_RECURSIVE); nuclear@0: RecursiveAttrInit = 1; nuclear@0: } nuclear@0: pthread_mutex_init(&mutex,&RecursiveAttr); nuclear@0: } nuclear@0: ~Lock () { pthread_mutex_destroy(&mutex); } nuclear@0: inline void DoLock() { pthread_mutex_lock(&mutex); } nuclear@0: inline void Unlock() { pthread_mutex_unlock(&mutex); } nuclear@0: nuclear@0: #endif // OVR_ENABLE_THREDS nuclear@0: nuclear@0: nuclear@0: public: nuclear@0: // Locker class, used for automatic locking nuclear@0: class Locker nuclear@0: { nuclear@0: public: nuclear@0: Lock *pLock; nuclear@0: inline Locker(Lock *plock) nuclear@0: { pLock = plock; pLock->DoLock(); } nuclear@0: inline ~Locker() nuclear@0: { pLock->Unlock(); } nuclear@0: }; nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: //------------------------------------------------------------------------------------- nuclear@0: // Globally shared Lock implementation used for MessageHandlers, etc. nuclear@0: nuclear@0: class SharedLock nuclear@0: { nuclear@0: public: nuclear@0: SharedLock() : UseCount(0) {} nuclear@0: nuclear@0: Lock* GetLockAddRef(); nuclear@0: void ReleaseLock(Lock* plock); nuclear@0: nuclear@0: private: nuclear@0: Lock* toLock() { return (Lock*)Buffer; } nuclear@0: nuclear@0: // UseCount and max alignment. nuclear@0: volatile int UseCount; nuclear@0: uint64_t Buffer[(sizeof(Lock)+sizeof(uint64_t)-1)/sizeof(uint64_t)]; nuclear@0: }; nuclear@0: nuclear@0: nuclear@0: } // OVR nuclear@0: nuclear@0: #endif