nuclear@3: /************************************************************************************ nuclear@3: nuclear@3: PublicHeader: OVR.h nuclear@3: Filename : OVR_Atomic.h nuclear@3: Content : Contains atomic operations and inline fastest locking nuclear@3: functionality. Will contain #ifdefs for OS efficiency. nuclear@3: Have non-thread-safe implementaion if not available. nuclear@3: Created : September 19, 2012 nuclear@3: Notes : nuclear@3: nuclear@3: Copyright : Copyright 2012 Oculus VR, Inc. All Rights reserved. nuclear@3: nuclear@3: Use of this software is subject to the terms of the Oculus license nuclear@3: agreement provided at the time of installation or download, or which nuclear@3: otherwise accompanies this software in either electronic or hard copy form. nuclear@3: nuclear@3: ************************************************************************************/ nuclear@3: #ifndef OVR_Atomic_h nuclear@3: #define OVR_Atomic_h nuclear@3: nuclear@3: #include "OVR_Types.h" nuclear@3: nuclear@3: // Include System thread functionality. nuclear@3: #if defined(OVR_OS_WIN32) nuclear@3: #include nuclear@3: #else nuclear@3: #include nuclear@3: #endif nuclear@3: nuclear@3: nuclear@3: namespace OVR { nuclear@3: nuclear@3: nuclear@3: // ****** Declared classes nuclear@3: nuclear@3: // If there is NO thread support we implement AtomicOps and nuclear@3: // Lock objects as no-ops. The other classes are not defined. nuclear@3: template class AtomicOps; nuclear@3: template class AtomicInt; nuclear@3: template class AtomicPtr; nuclear@3: nuclear@3: class Lock; nuclear@3: nuclear@3: nuclear@3: //----------------------------------------------------------------------------------- nuclear@3: // ***** AtomicOps nuclear@3: nuclear@3: // Atomic operations are provided by the AtomicOps templates class, nuclear@3: // implemented through system-specific AtomicOpsRaw specializations. nuclear@3: // It provides several fundamental operations such as Exchange, ExchangeAdd nuclear@3: // CompareAndSet, and Store_Release. Each function includes several memory nuclear@3: // synchronization versions, important for multiprocessing CPUs with weak nuclear@3: // memory consistency. The following memory fencing strategies are supported: nuclear@3: // nuclear@3: // - NoSync. No memory synchronization is done for atomic op. nuclear@3: // - Release. All other memory writes are completed before atomic op nuclear@3: // writes its results. nuclear@3: // - Acquire. Further memory reads are forced to wait until atomic op nuclear@3: // executes, guaranteeing that the right values will be seen. nuclear@3: // - Sync. A combination of Release and Acquire. nuclear@3: nuclear@3: nuclear@3: // *** AtomicOpsRaw nuclear@3: nuclear@3: // AtomicOpsRaw is a specialized template that provides atomic operations nuclear@3: // used by AtomicOps. This class has two fundamental qualities: (1) it nuclear@3: // defines a type T of correct size, and (2) provides operations that work nuclear@3: // atomically, such as Exchange_Sync and CompareAndSet_Release. nuclear@3: nuclear@3: // AtomicOpsRawBase class contains shared constants/classes for AtomicOpsRaw. nuclear@3: // The primary thing is does is define sync class objects, whose destructor and nuclear@3: // constructor provide places to insert appropriate synchronization calls, on nuclear@3: // systems where such calls are necessary. So far, the breakdown is as follows: nuclear@3: // nuclear@3: // - X86 systems don't need custom syncs, since their exchange/atomic nuclear@3: // instructions are implicitly synchronized. nuclear@3: // - PowerPC requires lwsync/isync instructions that can use this mechanism. nuclear@3: // - If some other systems require a mechanism where syncing type is associated nuclear@3: // with a particular instruction, the default implementation (which implements nuclear@3: // all Sync, Acquire, and Release modes in terms of NoSync and fence) may not nuclear@3: // work. Ii that case it will need to be #ifdef-ed conditionally. nuclear@3: nuclear@3: struct AtomicOpsRawBase nuclear@3: { nuclear@3: #if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_OS_WIN32) || defined(OVR_OS_IPHONE) nuclear@3: // Need to have empty constructor to avoid class 'unused' variable warning. nuclear@3: struct FullSync { inline FullSync() { } }; nuclear@3: struct AcquireSync { inline AcquireSync() { } }; nuclear@3: struct ReleaseSync { inline ReleaseSync() { } }; nuclear@3: nuclear@3: #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) nuclear@3: struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("isync\n"); } }; nuclear@3: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("isync\n"); } }; nuclear@3: struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; nuclear@3: nuclear@3: #elif defined(OVR_CPU_MIPS) nuclear@3: struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("sync\n"); } }; nuclear@3: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("sync\n"); } }; nuclear@3: struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; nuclear@3: nuclear@3: #elif defined(OVR_CPU_ARM) nuclear@3: struct FullSync { inline FullSync() { asm volatile("dmb\n"); } ~FullSync() { asm volatile("dmb\n"); } }; nuclear@3: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("dmb\n"); } }; nuclear@3: struct ReleaseSync { inline ReleaseSync() { asm volatile("dmb\n"); } }; nuclear@3: nuclear@3: nuclear@3: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4) nuclear@3: // __sync functions are already full sync nuclear@3: struct FullSync { inline FullSync() { } }; nuclear@3: struct AcquireSync { inline AcquireSync() { } }; nuclear@3: struct ReleaseSync { inline ReleaseSync() { } }; nuclear@3: #endif nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: // 4-Byte raw data atomic op implementation class. nuclear@3: struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase nuclear@3: { nuclear@3: #if !defined(OVR_ENABLE_THREADS) nuclear@3: nuclear@3: // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. nuclear@3: typedef UInt32 T; nuclear@3: nuclear@3: // *** Thread - Safe Atomic Versions. nuclear@3: nuclear@3: #elif defined(OVR_OS_WIN32) nuclear@3: nuclear@3: // Use special defined for VC6, where volatile is not used and nuclear@3: // InterlockedCompareExchange is declared incorrectly. nuclear@3: typedef LONG T; nuclear@3: #if defined(OVR_CC_MSVC) && (OVR_CC_MSVC < 1300) nuclear@3: typedef T* InterlockTPtr; nuclear@3: typedef LPVOID ET; nuclear@3: typedef ET* InterlockETPtr; nuclear@3: #else nuclear@3: typedef volatile T* InterlockTPtr; nuclear@3: typedef T ET; nuclear@3: typedef InterlockTPtr InterlockETPtr; nuclear@3: #endif nuclear@3: inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange((InterlockTPtr)p, val); } nuclear@3: inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd((InterlockTPtr)p, val); } nuclear@3: inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange((InterlockETPtr)p, (ET)val, (ET)c) == (ET)c; } nuclear@3: nuclear@3: #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) nuclear@3: typedef UInt32 T; nuclear@3: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: UInt32 ret; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "lwarx %[r],0,%[i]\n\t" nuclear@3: "stwcx. %[j],0,%[i]\n\t" nuclear@3: "bne- 1b\n" nuclear@3: : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: UInt32 dummy, ret; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "lwarx %[r],0,%[i]\n\t" nuclear@3: "add %[o],%[r],%[j]\n\t" nuclear@3: "stwcx. %[o],0,%[i]\n\t" nuclear@3: "bne- 1b\n" nuclear@3: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@3: { nuclear@3: UInt32 ret; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "lwarx %[r],0,%[i]\n\t" nuclear@3: "cmpw 0,%[r],%[cmp]\n\t" nuclear@3: "mfcr %[r]\n\t" nuclear@3: "bne- 2f\n\t" nuclear@3: "stwcx. %[val],0,%[i]\n\t" nuclear@3: "bne- 1b\n\t" nuclear@3: "2:\n" nuclear@3: : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc", "memory"); nuclear@3: nuclear@3: return (ret & 0x20000000) ? 1 : 0; nuclear@3: } nuclear@3: nuclear@3: #elif defined(OVR_CPU_MIPS) nuclear@3: typedef UInt32 T; nuclear@3: nuclear@3: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: UInt32 ret; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "ll %[r],0(%[i])\n\t" nuclear@3: "sc %[j],0(%[i])\n\t" nuclear@3: "beq %[j],$0,1b\n\t" nuclear@3: "nop \n" nuclear@3: : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: UInt32 ret; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "ll %[r],0(%[i])\n\t" nuclear@3: "addu %[j],%[r],%[j]\n\t" nuclear@3: "sc %[j],0(%[i])\n\t" nuclear@3: "beq %[j],$0,1b\n\t" nuclear@3: "nop \n" nuclear@3: : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@3: { nuclear@3: UInt32 ret, dummy; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "move %[r],$0\n\t" nuclear@3: "ll %[o],0(%[i])\n\t" nuclear@3: "bne %[o],%[c],2f\n\t" nuclear@3: "move %[r],%[v]\n\t" nuclear@3: "sc %[r],0(%[i])\n\t" nuclear@3: "beq %[r],$0,1b\n\t" nuclear@3: "nop \n\t" nuclear@3: "2:\n" nuclear@3: : "+m" (*i),[r] "=&d" (ret), [o] "=&d" (dummy) : [i] "d" (i), [c] "d" (c), [v] "d" (value) nuclear@3: : "cc", "memory"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: #elif defined(OVR_CPU_ARM) && defined(OVR_CC_ARM) nuclear@3: typedef UInt32 T; nuclear@3: nuclear@3: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: for(;;) nuclear@3: { nuclear@3: T r = __ldrex(i); nuclear@3: if (__strex(j, i) == 0) nuclear@3: return r; nuclear@3: } nuclear@3: } nuclear@3: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: for(;;) nuclear@3: { nuclear@3: T r = __ldrex(i); nuclear@3: if (__strex(r + j, i) == 0) nuclear@3: return r; nuclear@3: } nuclear@3: } nuclear@3: nuclear@3: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@3: { nuclear@3: for(;;) nuclear@3: { nuclear@3: T r = __ldrex(i); nuclear@3: if (r != c) nuclear@3: return 0; nuclear@3: if (__strex(value, i) == 0) nuclear@3: return 1; nuclear@3: } nuclear@3: } nuclear@3: nuclear@3: #elif defined(OVR_CPU_ARM) nuclear@3: typedef UInt32 T; nuclear@3: nuclear@3: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: UInt32 ret, dummy; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "ldrex %[r],[%[i]]\n\t" nuclear@3: "strex %[t],%[j],[%[i]]\n\t" nuclear@3: "cmp %[t],#0\n\t" nuclear@3: "bne 1b\n\t" nuclear@3: : "+m" (*i), [r] "=&r" (ret), [t] "=&r" (dummy) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: UInt32 ret, dummy, test; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "ldrex %[r],[%[i]]\n\t" nuclear@3: "add %[o],%[r],%[j]\n\t" nuclear@3: "strex %[t],%[o],[%[i]]\n\t" nuclear@3: "cmp %[t],#0\n\t" nuclear@3: "bne 1b\n\t" nuclear@3: : "+m" (*i), [r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@3: { nuclear@3: UInt32 ret = 1, dummy, test; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "ldrex %[o],[%[i]]\n\t" nuclear@3: "cmp %[o],%[c]\n\t" nuclear@3: "bne 2f\n\t" nuclear@3: "strex %[r],%[v],[%[i]]\n\t" nuclear@3: "cmp %[r],#0\n\t" nuclear@3: "bne 1b\n\t" nuclear@3: "2:\n" nuclear@3: : "+m" (*i),[r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [c] "r" (c), [v] "r" (value) nuclear@3: : "cc", "memory"); nuclear@3: nuclear@3: return !ret; nuclear@3: } nuclear@3: nuclear@3: #elif defined(OVR_CPU_X86) nuclear@3: typedef UInt32 T; nuclear@3: nuclear@3: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: asm volatile("xchgl %1,%[i]\n" nuclear@3: : "+m" (*i), "=q" (j) : [i] "m" (*i), "1" (j) : "cc", "memory"); nuclear@3: nuclear@3: return j; nuclear@3: } nuclear@3: nuclear@3: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@3: { nuclear@3: asm volatile("lock; xaddl %1,%[i]\n" nuclear@3: : "+m" (*i), "+q" (j) : [i] "m" (*i) : "cc", "memory"); nuclear@3: nuclear@3: return j; nuclear@3: } nuclear@3: nuclear@3: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@3: { nuclear@3: UInt32 ret; nuclear@3: nuclear@3: asm volatile("lock; cmpxchgl %[v],%[i]\n" nuclear@3: : "+m" (*i), "=a" (ret) : [i] "m" (*i), "1" (c), [v] "q" (value) : "cc", "memory"); nuclear@3: nuclear@3: return (ret == c); nuclear@3: } nuclear@3: nuclear@3: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) nuclear@3: nuclear@3: typedef UInt32 T; nuclear@3: nuclear@3: static inline T Exchange_NoSync(volatile T *i, T j) nuclear@3: { nuclear@3: T v; nuclear@3: do { nuclear@3: v = *i; nuclear@3: } while (!__sync_bool_compare_and_swap(i, v, j)); nuclear@3: return v; nuclear@3: } nuclear@3: nuclear@3: static inline T ExchangeAdd_NoSync(volatile T *i, T j) nuclear@3: { nuclear@3: return __sync_fetch_and_add(i, j); nuclear@3: } nuclear@3: nuclear@3: static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) nuclear@3: { nuclear@3: return __sync_bool_compare_and_swap(i, c, value); nuclear@3: } nuclear@3: nuclear@3: #endif // OS nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: // 8-Byte raw data data atomic op implementation class. nuclear@3: // Currently implementation is provided only on systems with 64-bit pointers. nuclear@3: struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase nuclear@3: { nuclear@3: #if !defined(OVR_64BIT_POINTERS) || !defined(OVR_ENABLE_THREADS) nuclear@3: nuclear@3: // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. nuclear@3: typedef UInt64 T; nuclear@3: nuclear@3: // *** Thread - Safe OS specific versions. nuclear@3: #elif defined(OVR_OS_WIN32) nuclear@3: nuclear@3: // This is only for 64-bit systems. nuclear@3: typedef LONG64 T; nuclear@3: typedef volatile T* InterlockTPtr; nuclear@3: inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange64((InterlockTPtr)p, val); } nuclear@3: inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd64((InterlockTPtr)p, val); } nuclear@3: inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange64((InterlockTPtr)p, val, c) == c; } nuclear@3: nuclear@3: #elif defined(OVR_CPU_PPC64) nuclear@3: nuclear@3: typedef UInt64 T; nuclear@3: nuclear@3: static inline UInt64 Exchange_NoSync(volatile UInt64 *i, UInt64 j) nuclear@3: { nuclear@3: UInt64 dummy, ret; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "ldarx %[r],0,%[i]\n\t" nuclear@3: "mr %[o],%[j]\n\t" nuclear@3: "stdcx. %[o],0,%[i]\n\t" nuclear@3: "bne- 1b\n" nuclear@3: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: static inline UInt64 ExchangeAdd_NoSync(volatile UInt64 *i, UInt64 j) nuclear@3: { nuclear@3: UInt64 dummy, ret; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "ldarx %[r],0,%[i]\n\t" nuclear@3: "add %[o],%[r],%[j]\n\t" nuclear@3: "stdcx. %[o],0,%[i]\n\t" nuclear@3: "bne- 1b\n" nuclear@3: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); nuclear@3: nuclear@3: return ret; nuclear@3: } nuclear@3: nuclear@3: static inline bool CompareAndSet_NoSync(volatile UInt64 *i, UInt64 c, UInt64 value) nuclear@3: { nuclear@3: UInt64 ret, dummy; nuclear@3: nuclear@3: asm volatile("1:\n\t" nuclear@3: "ldarx %[r],0,%[i]\n\t" nuclear@3: "cmpw 0,%[r],%[cmp]\n\t" nuclear@3: "mfcr %[r]\n\t" nuclear@3: "bne- 2f\n\t" nuclear@3: "stdcx. %[val],0,%[i]\n\t" nuclear@3: "bne- 1b\n\t" nuclear@3: "2:\n" nuclear@3: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc"); nuclear@3: nuclear@3: return (ret & 0x20000000) ? 1 : 0; nuclear@3: } nuclear@3: nuclear@3: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) nuclear@3: nuclear@3: typedef UInt64 T; nuclear@3: nuclear@3: static inline T Exchange_NoSync(volatile T *i, T j) nuclear@3: { nuclear@3: T v; nuclear@3: do { nuclear@3: v = *i; nuclear@3: } while (!__sync_bool_compare_and_swap(i, v, j)); nuclear@3: return v; nuclear@3: } nuclear@3: nuclear@3: static inline T ExchangeAdd_NoSync(volatile T *i, T j) nuclear@3: { nuclear@3: return __sync_fetch_and_add(i, j); nuclear@3: } nuclear@3: nuclear@3: static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) nuclear@3: { nuclear@3: return __sync_bool_compare_and_swap(i, c, value); nuclear@3: } nuclear@3: nuclear@3: #endif // OS nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: // Default implementation for AtomicOpsRaw; provides implementation of mem-fenced nuclear@3: // atomic operations where fencing is done with a sync object wrapped around a NoSync nuclear@3: // operation implemented in the base class. If such implementation is not possible nuclear@3: // on a given platform, #ifdefs can be used to disable it and then op functions can be nuclear@3: // implemented individually in the appropriate AtomicOpsRaw class. nuclear@3: nuclear@3: template nuclear@3: struct AtomicOpsRaw_DefImpl : public O nuclear@3: { nuclear@3: typedef typename O::T O_T; nuclear@3: typedef typename O::FullSync O_FullSync; nuclear@3: typedef typename O::AcquireSync O_AcquireSync; nuclear@3: typedef typename O::ReleaseSync O_ReleaseSync; nuclear@3: nuclear@3: // If there is no thread support, provide the default implementation. In this case, nuclear@3: // the base class (0) must still provide the T declaration. nuclear@3: #ifndef OVR_ENABLE_THREADS nuclear@3: nuclear@3: // Atomic exchange of val with argument. Returns old val. nuclear@3: inline static O_T Exchange_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p = val; return old; } nuclear@3: // Adds a new val to argument; returns its old val. nuclear@3: inline static O_T ExchangeAdd_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p += val; return old; } nuclear@3: // Compares the argument data with 'c' val. nuclear@3: // If succeeded, stores val int '*p' and returns true; otherwise returns false. nuclear@3: inline static bool CompareAndSet_NoSync(volatile O_T* p, O_T c, O_T val) { if (*p==c) { *p = val; return 1; } return 0; } nuclear@3: nuclear@3: #endif nuclear@3: nuclear@3: // If NoSync wrapped implementation may not be possible, it this block should be nuclear@3: // replaced with per-function implementation in O. nuclear@3: // "AtomicOpsRaw_DefImpl::" prefix in calls below. nuclear@3: inline static O_T Exchange_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@3: inline static O_T Exchange_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@3: inline static O_T Exchange_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@3: inline static O_T ExchangeAdd_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@3: inline static O_T ExchangeAdd_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@3: inline static O_T ExchangeAdd_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@3: inline static bool CompareAndSet_Sync(volatile O_T* p, O_T c, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@3: inline static bool CompareAndSet_Release(volatile O_T* p, O_T c, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@3: inline static bool CompareAndSet_Acquire(volatile O_T* p, O_T c, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@3: nuclear@3: // Loads and stores with memory fence. These have only the relevant versions. nuclear@3: #ifdef OVR_CPU_X86 nuclear@3: // On X86, Store_Release is implemented as exchange. Note that we can also nuclear@3: // consider 'sfence' in the future, although it is not as compatible with older CPUs. nuclear@3: inline static void Store_Release(volatile O_T* p, O_T val) { Exchange_Release(p, val); } nuclear@3: #else nuclear@3: inline static void Store_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); *p = val; } nuclear@3: #endif nuclear@3: inline static O_T Load_Acquire(const volatile O_T* p) { O_AcquireSync sync; OVR_UNUSED(sync); return *p; } nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: template nuclear@3: struct AtomicOpsRaw : public AtomicOpsRawBase { }; nuclear@3: nuclear@3: template<> nuclear@3: struct AtomicOpsRaw<4> : public AtomicOpsRaw_DefImpl nuclear@3: { nuclear@3: // Ensure that assigned type size is correct. nuclear@3: AtomicOpsRaw() nuclear@3: { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl::T) == 4); } nuclear@3: }; nuclear@3: template<> nuclear@3: struct AtomicOpsRaw<8> : public AtomicOpsRaw_DefImpl nuclear@3: { nuclear@3: AtomicOpsRaw() nuclear@3: { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl::T) == 8); } nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: // *** AtomicOps - implementation of atomic Ops for specified class nuclear@3: nuclear@3: // Implements atomic ops on a class, provided that the object is either nuclear@3: // 4 or 8 bytes in size (depending on the AtomicOpsRaw specializations nuclear@3: // available). Relies on AtomicOpsRaw for much of implementation. nuclear@3: nuclear@3: template nuclear@3: class AtomicOps nuclear@3: { nuclear@3: typedef AtomicOpsRaw Ops; nuclear@3: typedef typename Ops::T T; nuclear@3: typedef volatile typename Ops::T* PT; nuclear@3: // We cast through unions to (1) avoid pointer size compiler warnings nuclear@3: // and (2) ensure that there are no problems with strict pointer aliasing. nuclear@3: union C2T_union { C c; T t; }; nuclear@3: nuclear@3: public: nuclear@3: // General purpose implementation for standard syncs. nuclear@3: inline static C Exchange_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Sync((PT)p, u.t); return u.c; } nuclear@3: inline static C Exchange_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Release((PT)p, u.t); return u.c; } nuclear@3: inline static C Exchange_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Acquire((PT)p, u.t); return u.c; } nuclear@3: inline static C Exchange_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_NoSync((PT)p, u.t); return u.c; } nuclear@3: inline static C ExchangeAdd_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Sync((PT)p, u.t); return u.c; } nuclear@3: inline static C ExchangeAdd_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Release((PT)p, u.t); return u.c; } nuclear@3: inline static C ExchangeAdd_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Acquire((PT)p, u.t); return u.c; } nuclear@3: inline static C ExchangeAdd_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_NoSync((PT)p, u.t); return u.c; } nuclear@3: inline static bool CompareAndSet_Sync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Sync((PT)p, cu.t, u.t); } nuclear@3: inline static bool CompareAndSet_Release(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Release((PT)p, cu.t, u.t); } nuclear@3: inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } nuclear@3: inline static bool CompareAndSet_NoSync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_NoSync((PT)p, cu.t, u.t); } nuclear@3: // Loads and stores with memory fence. These have only the relevant versions. nuclear@3: inline static void Store_Release(volatile C* p, C val) { C2T_union u; u.c = val; Ops::Store_Release((PT)p, u.t); } nuclear@3: inline static C Load_Acquire(const volatile C* p) { C2T_union u; u.t = Ops::Load_Acquire((PT)p); return u.c; } nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: nuclear@3: // Atomic value base class - implements operations shared for integers and pointers. nuclear@3: template nuclear@3: class AtomicValueBase nuclear@3: { nuclear@3: protected: nuclear@3: typedef AtomicOps Ops; nuclear@3: public: nuclear@3: nuclear@3: volatile T Value; nuclear@3: nuclear@3: inline AtomicValueBase() { } nuclear@3: explicit inline AtomicValueBase(T val) { Ops::Store_Release(&Value, val); } nuclear@3: nuclear@3: // Most libraries (TBB and Joshua Scholar's) library do not do Load_Acquire nuclear@3: // here, since most algorithms do not require atomic loads. Needs some research. nuclear@3: inline operator T() const { return Value; } nuclear@3: nuclear@3: // *** Standard Atomic inlines nuclear@3: inline T Exchange_Sync(T val) { return Ops::Exchange_Sync(&Value, val); } nuclear@3: inline T Exchange_Release(T val) { return Ops::Exchange_Release(&Value, val); } nuclear@3: inline T Exchange_Acquire(T val) { return Ops::Exchange_Acquire(&Value, val); } nuclear@3: inline T Exchange_NoSync(T val) { return Ops::Exchange_NoSync(&Value, val); } nuclear@3: inline bool CompareAndSet_Sync(T c, T val) { return Ops::CompareAndSet_Sync(&Value, c, val); } nuclear@3: inline bool CompareAndSet_Release(T c, T val) { return Ops::CompareAndSet_Release(&Value, c, val); } nuclear@3: inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Relse(&Value, c, val); } nuclear@3: inline bool CompareAndSet_NoSync(T c, T val) { return Ops::CompareAndSet_NoSync(&Value, c, val); } nuclear@3: // Load & Store. nuclear@3: inline void Store_Release(T val) { Ops::Store_Release(&Value, val); } nuclear@3: inline T Load_Acquire() const { return Ops::Load_Acquire(&Value); } nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: // ***** AtomicPtr - Atomic pointer template nuclear@3: nuclear@3: // This pointer class supports atomic assignments with release, nuclear@3: // increment / decrement operations, and conditional compare + set. nuclear@3: nuclear@3: template nuclear@3: class AtomicPtr : public AtomicValueBase nuclear@3: { nuclear@3: typedef typename AtomicValueBase::Ops Ops; nuclear@3: nuclear@3: public: nuclear@3: // Initialize pointer value to 0 by default; use Store_Release only with explicit constructor. nuclear@3: inline AtomicPtr() : AtomicValueBase() { this->Value = 0; } nuclear@3: explicit inline AtomicPtr(T* val) : AtomicValueBase(val) { } nuclear@3: nuclear@3: // Pointer access. nuclear@3: inline T* operator -> () const { return this->Load_Acquire(); } nuclear@3: nuclear@3: // It looks like it is convenient to have Load_Acquire characteristics nuclear@3: // for this, since that is convenient for algorithms such as linked nuclear@3: // list traversals that can be added to bu another thread. nuclear@3: inline operator T* () const { return this->Load_Acquire(); } nuclear@3: nuclear@3: nuclear@3: // *** Standard Atomic inlines (applicable to pointers) nuclear@3: nuclear@3: // ExhangeAdd considers pointer size for pointers. nuclear@3: template nuclear@3: inline T* ExchangeAdd_Sync(I incr) { return Ops::ExchangeAdd_Sync(&this->Value, ((T*)0) + incr); } nuclear@3: template nuclear@3: inline T* ExchangeAdd_Release(I incr) { return Ops::ExchangeAdd_Release(&this->Value, ((T*)0) + incr); } nuclear@3: template nuclear@3: inline T* ExchangeAdd_Acquire(I incr) { return Ops::ExchangeAdd_Acquire(&this->Value, ((T*)0) + incr); } nuclear@3: template nuclear@3: inline T* ExchangeAdd_NoSync(I incr) { return Ops::ExchangeAdd_NoSync(&this->Value, ((T*)0) + incr); } nuclear@3: nuclear@3: // *** Atomic Operators nuclear@3: nuclear@3: inline T* operator = (T* val) { this->Store_Release(val); return val; } nuclear@3: nuclear@3: template nuclear@3: inline T* operator += (I val) { return ExchangeAdd_Sync(val) + val; } nuclear@3: template nuclear@3: inline T* operator -= (I val) { return operator += (-val); } nuclear@3: nuclear@3: inline T* operator ++ () { return ExchangeAdd_Sync(1) + 1; } nuclear@3: inline T* operator -- () { return ExchangeAdd_Sync(-1) - 1; } nuclear@3: inline T* operator ++ (int) { return ExchangeAdd_Sync(1); } nuclear@3: inline T* operator -- (int) { return ExchangeAdd_Sync(-1); } nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: // ***** AtomicInt - Atomic integer template nuclear@3: nuclear@3: // Implements an atomic integer type; the exact type to use is provided nuclear@3: // as an argument. Supports atomic Acquire / Release semantics, atomic nuclear@3: // arithmetic operations, and atomic conditional compare + set. nuclear@3: nuclear@3: template nuclear@3: class AtomicInt : public AtomicValueBase nuclear@3: { nuclear@3: typedef typename AtomicValueBase::Ops Ops; nuclear@3: nuclear@3: public: nuclear@3: inline AtomicInt() : AtomicValueBase() { } nuclear@3: explicit inline AtomicInt(T val) : AtomicValueBase(val) { } nuclear@3: nuclear@3: nuclear@3: // *** Standard Atomic inlines (applicable to int) nuclear@3: inline T ExchangeAdd_Sync(T val) { return Ops::ExchangeAdd_Sync(&this->Value, val); } nuclear@3: inline T ExchangeAdd_Release(T val) { return Ops::ExchangeAdd_Release(&this->Value, val); } nuclear@3: inline T ExchangeAdd_Acquire(T val) { return Ops::ExchangeAdd_Acquire(&this->Value, val); } nuclear@3: inline T ExchangeAdd_NoSync(T val) { return Ops::ExchangeAdd_NoSync(&this->Value, val); } nuclear@3: // These increments could be more efficient because they don't return a value. nuclear@3: inline void Increment_Sync() { ExchangeAdd_Sync((T)1); } nuclear@3: inline void Increment_Release() { ExchangeAdd_Release((T)1); } nuclear@3: inline void Increment_Acquire() { ExchangeAdd_Acquire((T)1); } nuclear@3: inline void Increment_NoSync() { ExchangeAdd_NoSync((T)1); } nuclear@3: nuclear@3: // *** Atomic Operators nuclear@3: nuclear@3: inline T operator = (T val) { this->Store_Release(val); return val; } nuclear@3: inline T operator += (T val) { return ExchangeAdd_Sync(val) + val; } nuclear@3: inline T operator -= (T val) { return ExchangeAdd_Sync(0 - val) - val; } nuclear@3: nuclear@3: inline T operator ++ () { return ExchangeAdd_Sync((T)1) + 1; } nuclear@3: inline T operator -- () { return ExchangeAdd_Sync(((T)0)-1) - 1; } nuclear@3: inline T operator ++ (int) { return ExchangeAdd_Sync((T)1); } nuclear@3: inline T operator -- (int) { return ExchangeAdd_Sync(((T)0)-1); } nuclear@3: nuclear@3: // More complex atomic operations. Leave it to compiler whether to optimize them or not. nuclear@3: T operator &= (T arg) nuclear@3: { nuclear@3: T comp, newVal; nuclear@3: do { nuclear@3: comp = this->Value; nuclear@3: newVal = comp & arg; nuclear@3: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@3: return newVal; nuclear@3: } nuclear@3: nuclear@3: T operator |= (T arg) nuclear@3: { nuclear@3: T comp, newVal; nuclear@3: do { nuclear@3: comp = this->Value; nuclear@3: newVal = comp | arg; nuclear@3: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@3: return newVal; nuclear@3: } nuclear@3: nuclear@3: T operator ^= (T arg) nuclear@3: { nuclear@3: T comp, newVal; nuclear@3: do { nuclear@3: comp = this->Value; nuclear@3: newVal = comp ^ arg; nuclear@3: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@3: return newVal; nuclear@3: } nuclear@3: nuclear@3: T operator *= (T arg) nuclear@3: { nuclear@3: T comp, newVal; nuclear@3: do { nuclear@3: comp = this->Value; nuclear@3: newVal = comp * arg; nuclear@3: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@3: return newVal; nuclear@3: } nuclear@3: nuclear@3: T operator /= (T arg) nuclear@3: { nuclear@3: T comp, newVal; nuclear@3: do { nuclear@3: comp = this->Value; nuclear@3: newVal = comp / arg; nuclear@3: } while(!CompareAndSet_Sync(comp, newVal)); nuclear@3: return newVal; nuclear@3: } nuclear@3: nuclear@3: T operator >>= (unsigned bits) nuclear@3: { nuclear@3: T comp, newVal; nuclear@3: do { nuclear@3: comp = this->Value; nuclear@3: newVal = comp >> bits; nuclear@3: } while(!CompareAndSet_Sync(comp, newVal)); nuclear@3: return newVal; nuclear@3: } nuclear@3: nuclear@3: T operator <<= (unsigned bits) nuclear@3: { nuclear@3: T comp, newVal; nuclear@3: do { nuclear@3: comp = this->Value; nuclear@3: newVal = comp << bits; nuclear@3: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@3: return newVal; nuclear@3: } nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: nuclear@3: //----------------------------------------------------------------------------------- nuclear@3: // ***** Lock nuclear@3: nuclear@3: // Lock is a simplest and most efficient mutual-exclusion lock class. nuclear@3: // Unlike Mutex, it cannot be waited on. nuclear@3: nuclear@3: class Lock nuclear@3: { nuclear@3: // NOTE: Locks are not allocatable and they themselves should not allocate nuclear@3: // memory by standard means. This is the case because StandardAllocator nuclear@3: // relies on this class. nuclear@3: // Make 'delete' private. Don't do this for 'new' since it can be redefined. nuclear@3: void operator delete(void*) {} nuclear@3: nuclear@3: nuclear@3: // *** Lock implementation for various platforms. nuclear@3: nuclear@3: #if !defined(OVR_ENABLE_THREADS) nuclear@3: nuclear@3: public: nuclear@3: // With no thread support, lock does nothing. nuclear@3: inline Lock() { } nuclear@3: inline Lock(unsigned) { } nuclear@3: inline ~Lock() { } nuclear@3: inline void DoLock() { } nuclear@3: inline void Unlock() { } nuclear@3: nuclear@3: // Windows. nuclear@3: #elif defined(OVR_OS_WIN32) nuclear@3: nuclear@3: CRITICAL_SECTION cs; nuclear@3: public: nuclear@3: Lock(unsigned spinCount = 0); nuclear@3: ~Lock(); nuclear@3: // Locking functions. nuclear@3: inline void DoLock() { ::EnterCriticalSection(&cs); } nuclear@3: inline void Unlock() { ::LeaveCriticalSection(&cs); } nuclear@3: nuclear@3: #else nuclear@3: pthread_mutex_t mutex; nuclear@3: nuclear@3: public: nuclear@3: static pthread_mutexattr_t RecursiveAttr; nuclear@3: static bool RecursiveAttrInit; nuclear@3: nuclear@3: Lock (unsigned dummy = 0) nuclear@3: { nuclear@3: if (!RecursiveAttrInit) nuclear@3: { nuclear@3: pthread_mutexattr_init(&RecursiveAttr); nuclear@3: pthread_mutexattr_settype(&RecursiveAttr, PTHREAD_MUTEX_RECURSIVE); nuclear@3: RecursiveAttrInit = 1; nuclear@3: } nuclear@3: pthread_mutex_init(&mutex,&RecursiveAttr); nuclear@3: } nuclear@3: ~Lock () { pthread_mutex_destroy(&mutex); } nuclear@3: inline void DoLock() { pthread_mutex_lock(&mutex); } nuclear@3: inline void Unlock() { pthread_mutex_unlock(&mutex); } nuclear@3: nuclear@3: #endif // OVR_ENABLE_THREDS nuclear@3: nuclear@3: nuclear@3: public: nuclear@3: // Locker class, used for automatic locking nuclear@3: class Locker nuclear@3: { nuclear@3: public: nuclear@3: Lock *pLock; nuclear@3: inline Locker(Lock *plock) nuclear@3: { pLock = plock; pLock->DoLock(); } nuclear@3: inline ~Locker() nuclear@3: { pLock->Unlock(); } nuclear@3: }; nuclear@3: }; nuclear@3: nuclear@3: nuclear@3: nuclear@3: } // OVR nuclear@3: nuclear@3: #endif