nuclear@1: /************************************************************************************ nuclear@1: nuclear@1: PublicHeader: OVR.h nuclear@1: Filename : OVR_Atomic.h nuclear@1: Content : Contains atomic operations and inline fastest locking nuclear@1: functionality. Will contain #ifdefs for OS efficiency. nuclear@1: Have non-thread-safe implementaion if not available. nuclear@1: Created : September 19, 2012 nuclear@1: Notes : nuclear@1: nuclear@1: Copyright : Copyright 2012 Oculus VR, Inc. All Rights reserved. nuclear@1: nuclear@1: Use of this software is subject to the terms of the Oculus license nuclear@1: agreement provided at the time of installation or download, or which nuclear@1: otherwise accompanies this software in either electronic or hard copy form. nuclear@1: nuclear@1: ************************************************************************************/ nuclear@1: #ifndef OVR_Atomic_h nuclear@1: #define OVR_Atomic_h nuclear@1: nuclear@1: #include "OVR_Types.h" nuclear@1: nuclear@1: // Include System thread functionality. nuclear@1: #if defined(OVR_OS_WIN32) nuclear@1: #include nuclear@1: #else nuclear@1: #include nuclear@1: #endif nuclear@1: nuclear@1: nuclear@1: namespace OVR { nuclear@1: nuclear@1: nuclear@1: // ****** Declared classes nuclear@1: nuclear@1: // If there is NO thread support we implement AtomicOps and nuclear@1: // Lock objects as no-ops. The other classes are not defined. nuclear@1: template class AtomicOps; nuclear@1: template class AtomicInt; nuclear@1: template class AtomicPtr; nuclear@1: nuclear@1: class Lock; nuclear@1: nuclear@1: nuclear@1: //----------------------------------------------------------------------------------- nuclear@1: // ***** AtomicOps nuclear@1: nuclear@1: // Atomic operations are provided by the AtomicOps templates class, nuclear@1: // implemented through system-specific AtomicOpsRaw specializations. nuclear@1: // It provides several fundamental operations such as Exchange, ExchangeAdd nuclear@1: // CompareAndSet, and Store_Release. Each function includes several memory nuclear@1: // synchronization versions, important for multiprocessing CPUs with weak nuclear@1: // memory consistency. The following memory fencing strategies are supported: nuclear@1: // nuclear@1: // - NoSync. No memory synchronization is done for atomic op. nuclear@1: // - Release. All other memory writes are completed before atomic op nuclear@1: // writes its results. nuclear@1: // - Acquire. Further memory reads are forced to wait until atomic op nuclear@1: // executes, guaranteeing that the right values will be seen. nuclear@1: // - Sync. A combination of Release and Acquire. nuclear@1: nuclear@1: nuclear@1: // *** AtomicOpsRaw nuclear@1: nuclear@1: // AtomicOpsRaw is a specialized template that provides atomic operations nuclear@1: // used by AtomicOps. This class has two fundamental qualities: (1) it nuclear@1: // defines a type T of correct size, and (2) provides operations that work nuclear@1: // atomically, such as Exchange_Sync and CompareAndSet_Release. nuclear@1: nuclear@1: // AtomicOpsRawBase class contains shared constants/classes for AtomicOpsRaw. nuclear@1: // The primary thing is does is define sync class objects, whose destructor and nuclear@1: // constructor provide places to insert appropriate synchronization calls, on nuclear@1: // systems where such calls are necessary. So far, the breakdown is as follows: nuclear@1: // nuclear@1: // - X86 systems don't need custom syncs, since their exchange/atomic nuclear@1: // instructions are implicitly synchronized. nuclear@1: // - PowerPC requires lwsync/isync instructions that can use this mechanism. nuclear@1: // - If some other systems require a mechanism where syncing type is associated nuclear@1: // with a particular instruction, the default implementation (which implements nuclear@1: // all Sync, Acquire, and Release modes in terms of NoSync and fence) may not nuclear@1: // work. Ii that case it will need to be #ifdef-ed conditionally. nuclear@1: nuclear@1: struct AtomicOpsRawBase nuclear@1: { nuclear@1: #if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_OS_WIN32) || defined(OVR_OS_IPHONE) nuclear@1: // Need to have empty constructor to avoid class 'unused' variable warning. nuclear@1: struct FullSync { inline FullSync() { } }; nuclear@1: struct AcquireSync { inline AcquireSync() { } }; nuclear@1: struct ReleaseSync { inline ReleaseSync() { } }; nuclear@1: nuclear@1: #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) nuclear@1: struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("isync\n"); } }; nuclear@1: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("isync\n"); } }; nuclear@1: struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; nuclear@1: nuclear@1: #elif defined(OVR_CPU_MIPS) nuclear@1: struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("sync\n"); } }; nuclear@1: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("sync\n"); } }; nuclear@1: struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } }; nuclear@1: nuclear@1: #elif defined(OVR_CPU_ARM) nuclear@1: struct FullSync { inline FullSync() { asm volatile("dmb\n"); } ~FullSync() { asm volatile("dmb\n"); } }; nuclear@1: struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("dmb\n"); } }; nuclear@1: struct ReleaseSync { inline ReleaseSync() { asm volatile("dmb\n"); } }; nuclear@1: nuclear@1: nuclear@1: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4) nuclear@1: // __sync functions are already full sync nuclear@1: struct FullSync { inline FullSync() { } }; nuclear@1: struct AcquireSync { inline AcquireSync() { } }; nuclear@1: struct ReleaseSync { inline ReleaseSync() { } }; nuclear@1: #endif nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: // 4-Byte raw data atomic op implementation class. nuclear@1: struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase nuclear@1: { nuclear@1: #if !defined(OVR_ENABLE_THREADS) nuclear@1: nuclear@1: // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. nuclear@1: typedef UInt32 T; nuclear@1: nuclear@1: // *** Thread - Safe Atomic Versions. nuclear@1: nuclear@1: #elif defined(OVR_OS_WIN32) nuclear@1: nuclear@1: // Use special defined for VC6, where volatile is not used and nuclear@1: // InterlockedCompareExchange is declared incorrectly. nuclear@1: typedef LONG T; nuclear@1: #if defined(OVR_CC_MSVC) && (OVR_CC_MSVC < 1300) nuclear@1: typedef T* InterlockTPtr; nuclear@1: typedef LPVOID ET; nuclear@1: typedef ET* InterlockETPtr; nuclear@1: #else nuclear@1: typedef volatile T* InterlockTPtr; nuclear@1: typedef T ET; nuclear@1: typedef InterlockTPtr InterlockETPtr; nuclear@1: #endif nuclear@1: inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange((InterlockTPtr)p, val); } nuclear@1: inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd((InterlockTPtr)p, val); } nuclear@1: inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange((InterlockETPtr)p, (ET)val, (ET)c) == (ET)c; } nuclear@1: nuclear@1: #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC) nuclear@1: typedef UInt32 T; nuclear@1: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: UInt32 ret; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "lwarx %[r],0,%[i]\n\t" nuclear@1: "stwcx. %[j],0,%[i]\n\t" nuclear@1: "bne- 1b\n" nuclear@1: : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: UInt32 dummy, ret; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "lwarx %[r],0,%[i]\n\t" nuclear@1: "add %[o],%[r],%[j]\n\t" nuclear@1: "stwcx. %[o],0,%[i]\n\t" nuclear@1: "bne- 1b\n" nuclear@1: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc", "memory"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@1: { nuclear@1: UInt32 ret; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "lwarx %[r],0,%[i]\n\t" nuclear@1: "cmpw 0,%[r],%[cmp]\n\t" nuclear@1: "mfcr %[r]\n\t" nuclear@1: "bne- 2f\n\t" nuclear@1: "stwcx. %[val],0,%[i]\n\t" nuclear@1: "bne- 1b\n\t" nuclear@1: "2:\n" nuclear@1: : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc", "memory"); nuclear@1: nuclear@1: return (ret & 0x20000000) ? 1 : 0; nuclear@1: } nuclear@1: nuclear@1: #elif defined(OVR_CPU_MIPS) nuclear@1: typedef UInt32 T; nuclear@1: nuclear@1: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: UInt32 ret; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "ll %[r],0(%[i])\n\t" nuclear@1: "sc %[j],0(%[i])\n\t" nuclear@1: "beq %[j],$0,1b\n\t" nuclear@1: "nop \n" nuclear@1: : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: UInt32 ret; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "ll %[r],0(%[i])\n\t" nuclear@1: "addu %[j],%[r],%[j]\n\t" nuclear@1: "sc %[j],0(%[i])\n\t" nuclear@1: "beq %[j],$0,1b\n\t" nuclear@1: "nop \n" nuclear@1: : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@1: { nuclear@1: UInt32 ret, dummy; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "move %[r],$0\n\t" nuclear@1: "ll %[o],0(%[i])\n\t" nuclear@1: "bne %[o],%[c],2f\n\t" nuclear@1: "move %[r],%[v]\n\t" nuclear@1: "sc %[r],0(%[i])\n\t" nuclear@1: "beq %[r],$0,1b\n\t" nuclear@1: "nop \n\t" nuclear@1: "2:\n" nuclear@1: : "+m" (*i),[r] "=&d" (ret), [o] "=&d" (dummy) : [i] "d" (i), [c] "d" (c), [v] "d" (value) nuclear@1: : "cc", "memory"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: #elif defined(OVR_CPU_ARM) && defined(OVR_CC_ARM) nuclear@1: typedef UInt32 T; nuclear@1: nuclear@1: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: for(;;) nuclear@1: { nuclear@1: T r = __ldrex(i); nuclear@1: if (__strex(j, i) == 0) nuclear@1: return r; nuclear@1: } nuclear@1: } nuclear@1: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: for(;;) nuclear@1: { nuclear@1: T r = __ldrex(i); nuclear@1: if (__strex(r + j, i) == 0) nuclear@1: return r; nuclear@1: } nuclear@1: } nuclear@1: nuclear@1: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@1: { nuclear@1: for(;;) nuclear@1: { nuclear@1: T r = __ldrex(i); nuclear@1: if (r != c) nuclear@1: return 0; nuclear@1: if (__strex(value, i) == 0) nuclear@1: return 1; nuclear@1: } nuclear@1: } nuclear@1: nuclear@1: #elif defined(OVR_CPU_ARM) nuclear@1: typedef UInt32 T; nuclear@1: nuclear@1: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: UInt32 ret, dummy; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "ldrex %[r],[%[i]]\n\t" nuclear@1: "strex %[t],%[j],[%[i]]\n\t" nuclear@1: "cmp %[t],#0\n\t" nuclear@1: "bne 1b\n\t" nuclear@1: : "+m" (*i), [r] "=&r" (ret), [t] "=&r" (dummy) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: UInt32 ret, dummy, test; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "ldrex %[r],[%[i]]\n\t" nuclear@1: "add %[o],%[r],%[j]\n\t" nuclear@1: "strex %[t],%[o],[%[i]]\n\t" nuclear@1: "cmp %[t],#0\n\t" nuclear@1: "bne 1b\n\t" nuclear@1: : "+m" (*i), [r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [j] "r" (j) : "cc", "memory"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@1: { nuclear@1: UInt32 ret = 1, dummy, test; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "ldrex %[o],[%[i]]\n\t" nuclear@1: "cmp %[o],%[c]\n\t" nuclear@1: "bne 2f\n\t" nuclear@1: "strex %[r],%[v],[%[i]]\n\t" nuclear@1: "cmp %[r],#0\n\t" nuclear@1: "bne 1b\n\t" nuclear@1: "2:\n" nuclear@1: : "+m" (*i),[r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [c] "r" (c), [v] "r" (value) nuclear@1: : "cc", "memory"); nuclear@1: nuclear@1: return !ret; nuclear@1: } nuclear@1: nuclear@1: #elif defined(OVR_CPU_X86) nuclear@1: typedef UInt32 T; nuclear@1: nuclear@1: static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: asm volatile("xchgl %1,%[i]\n" nuclear@1: : "+m" (*i), "=q" (j) : [i] "m" (*i), "1" (j) : "cc", "memory"); nuclear@1: nuclear@1: return j; nuclear@1: } nuclear@1: nuclear@1: static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j) nuclear@1: { nuclear@1: asm volatile("lock; xaddl %1,%[i]\n" nuclear@1: : "+m" (*i), "+q" (j) : [i] "m" (*i) : "cc", "memory"); nuclear@1: nuclear@1: return j; nuclear@1: } nuclear@1: nuclear@1: static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value) nuclear@1: { nuclear@1: UInt32 ret; nuclear@1: nuclear@1: asm volatile("lock; cmpxchgl %[v],%[i]\n" nuclear@1: : "+m" (*i), "=a" (ret) : [i] "m" (*i), "1" (c), [v] "q" (value) : "cc", "memory"); nuclear@1: nuclear@1: return (ret == c); nuclear@1: } nuclear@1: nuclear@1: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) nuclear@1: nuclear@1: typedef UInt32 T; nuclear@1: nuclear@1: static inline T Exchange_NoSync(volatile T *i, T j) nuclear@1: { nuclear@1: T v; nuclear@1: do { nuclear@1: v = *i; nuclear@1: } while (!__sync_bool_compare_and_swap(i, v, j)); nuclear@1: return v; nuclear@1: } nuclear@1: nuclear@1: static inline T ExchangeAdd_NoSync(volatile T *i, T j) nuclear@1: { nuclear@1: return __sync_fetch_and_add(i, j); nuclear@1: } nuclear@1: nuclear@1: static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) nuclear@1: { nuclear@1: return __sync_bool_compare_and_swap(i, c, value); nuclear@1: } nuclear@1: nuclear@1: #endif // OS nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: // 8-Byte raw data data atomic op implementation class. nuclear@1: // Currently implementation is provided only on systems with 64-bit pointers. nuclear@1: struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase nuclear@1: { nuclear@1: #if !defined(OVR_64BIT_POINTERS) || !defined(OVR_ENABLE_THREADS) nuclear@1: nuclear@1: // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl. nuclear@1: typedef UInt64 T; nuclear@1: nuclear@1: // *** Thread - Safe OS specific versions. nuclear@1: #elif defined(OVR_OS_WIN32) nuclear@1: nuclear@1: // This is only for 64-bit systems. nuclear@1: typedef LONG64 T; nuclear@1: typedef volatile T* InterlockTPtr; nuclear@1: inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange64((InterlockTPtr)p, val); } nuclear@1: inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd64((InterlockTPtr)p, val); } nuclear@1: inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange64((InterlockTPtr)p, val, c) == c; } nuclear@1: nuclear@1: #elif defined(OVR_CPU_PPC64) nuclear@1: nuclear@1: typedef UInt64 T; nuclear@1: nuclear@1: static inline UInt64 Exchange_NoSync(volatile UInt64 *i, UInt64 j) nuclear@1: { nuclear@1: UInt64 dummy, ret; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "ldarx %[r],0,%[i]\n\t" nuclear@1: "mr %[o],%[j]\n\t" nuclear@1: "stdcx. %[o],0,%[i]\n\t" nuclear@1: "bne- 1b\n" nuclear@1: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: static inline UInt64 ExchangeAdd_NoSync(volatile UInt64 *i, UInt64 j) nuclear@1: { nuclear@1: UInt64 dummy, ret; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "ldarx %[r],0,%[i]\n\t" nuclear@1: "add %[o],%[r],%[j]\n\t" nuclear@1: "stdcx. %[o],0,%[i]\n\t" nuclear@1: "bne- 1b\n" nuclear@1: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc"); nuclear@1: nuclear@1: return ret; nuclear@1: } nuclear@1: nuclear@1: static inline bool CompareAndSet_NoSync(volatile UInt64 *i, UInt64 c, UInt64 value) nuclear@1: { nuclear@1: UInt64 ret, dummy; nuclear@1: nuclear@1: asm volatile("1:\n\t" nuclear@1: "ldarx %[r],0,%[i]\n\t" nuclear@1: "cmpw 0,%[r],%[cmp]\n\t" nuclear@1: "mfcr %[r]\n\t" nuclear@1: "bne- 2f\n\t" nuclear@1: "stdcx. %[val],0,%[i]\n\t" nuclear@1: "bne- 1b\n\t" nuclear@1: "2:\n" nuclear@1: : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc"); nuclear@1: nuclear@1: return (ret & 0x20000000) ? 1 : 0; nuclear@1: } nuclear@1: nuclear@1: #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1) nuclear@1: nuclear@1: typedef UInt64 T; nuclear@1: nuclear@1: static inline T Exchange_NoSync(volatile T *i, T j) nuclear@1: { nuclear@1: T v; nuclear@1: do { nuclear@1: v = *i; nuclear@1: } while (!__sync_bool_compare_and_swap(i, v, j)); nuclear@1: return v; nuclear@1: } nuclear@1: nuclear@1: static inline T ExchangeAdd_NoSync(volatile T *i, T j) nuclear@1: { nuclear@1: return __sync_fetch_and_add(i, j); nuclear@1: } nuclear@1: nuclear@1: static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value) nuclear@1: { nuclear@1: return __sync_bool_compare_and_swap(i, c, value); nuclear@1: } nuclear@1: nuclear@1: #endif // OS nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: // Default implementation for AtomicOpsRaw; provides implementation of mem-fenced nuclear@1: // atomic operations where fencing is done with a sync object wrapped around a NoSync nuclear@1: // operation implemented in the base class. If such implementation is not possible nuclear@1: // on a given platform, #ifdefs can be used to disable it and then op functions can be nuclear@1: // implemented individually in the appropriate AtomicOpsRaw class. nuclear@1: nuclear@1: template nuclear@1: struct AtomicOpsRaw_DefImpl : public O nuclear@1: { nuclear@1: typedef typename O::T O_T; nuclear@1: typedef typename O::FullSync O_FullSync; nuclear@1: typedef typename O::AcquireSync O_AcquireSync; nuclear@1: typedef typename O::ReleaseSync O_ReleaseSync; nuclear@1: nuclear@1: // If there is no thread support, provide the default implementation. In this case, nuclear@1: // the base class (0) must still provide the T declaration. nuclear@1: #ifndef OVR_ENABLE_THREADS nuclear@1: nuclear@1: // Atomic exchange of val with argument. Returns old val. nuclear@1: inline static O_T Exchange_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p = val; return old; } nuclear@1: // Adds a new val to argument; returns its old val. nuclear@1: inline static O_T ExchangeAdd_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p += val; return old; } nuclear@1: // Compares the argument data with 'c' val. nuclear@1: // If succeeded, stores val int '*p' and returns true; otherwise returns false. nuclear@1: inline static bool CompareAndSet_NoSync(volatile O_T* p, O_T c, O_T val) { if (*p==c) { *p = val; return 1; } return 0; } nuclear@1: nuclear@1: #endif nuclear@1: nuclear@1: // If NoSync wrapped implementation may not be possible, it this block should be nuclear@1: // replaced with per-function implementation in O. nuclear@1: // "AtomicOpsRaw_DefImpl::" prefix in calls below. nuclear@1: inline static O_T Exchange_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@1: inline static O_T Exchange_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@1: inline static O_T Exchange_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::Exchange_NoSync(p, val); } nuclear@1: inline static O_T ExchangeAdd_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@1: inline static O_T ExchangeAdd_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@1: inline static O_T ExchangeAdd_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::ExchangeAdd_NoSync(p, val); } nuclear@1: inline static bool CompareAndSet_Sync(volatile O_T* p, O_T c, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@1: inline static bool CompareAndSet_Release(volatile O_T* p, O_T c, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@1: inline static bool CompareAndSet_Acquire(volatile O_T* p, O_T c, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl::CompareAndSet_NoSync(p,c,val); } nuclear@1: nuclear@1: // Loads and stores with memory fence. These have only the relevant versions. nuclear@1: #ifdef OVR_CPU_X86 nuclear@1: // On X86, Store_Release is implemented as exchange. Note that we can also nuclear@1: // consider 'sfence' in the future, although it is not as compatible with older CPUs. nuclear@1: inline static void Store_Release(volatile O_T* p, O_T val) { Exchange_Release(p, val); } nuclear@1: #else nuclear@1: inline static void Store_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); *p = val; } nuclear@1: #endif nuclear@1: inline static O_T Load_Acquire(const volatile O_T* p) { O_AcquireSync sync; OVR_UNUSED(sync); return *p; } nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: template nuclear@1: struct AtomicOpsRaw : public AtomicOpsRawBase { }; nuclear@1: nuclear@1: template<> nuclear@1: struct AtomicOpsRaw<4> : public AtomicOpsRaw_DefImpl nuclear@1: { nuclear@1: // Ensure that assigned type size is correct. nuclear@1: AtomicOpsRaw() nuclear@1: { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl::T) == 4); } nuclear@1: }; nuclear@1: template<> nuclear@1: struct AtomicOpsRaw<8> : public AtomicOpsRaw_DefImpl nuclear@1: { nuclear@1: AtomicOpsRaw() nuclear@1: { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl::T) == 8); } nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: // *** AtomicOps - implementation of atomic Ops for specified class nuclear@1: nuclear@1: // Implements atomic ops on a class, provided that the object is either nuclear@1: // 4 or 8 bytes in size (depending on the AtomicOpsRaw specializations nuclear@1: // available). Relies on AtomicOpsRaw for much of implementation. nuclear@1: nuclear@1: template nuclear@1: class AtomicOps nuclear@1: { nuclear@1: typedef AtomicOpsRaw Ops; nuclear@1: typedef typename Ops::T T; nuclear@1: typedef volatile typename Ops::T* PT; nuclear@1: // We cast through unions to (1) avoid pointer size compiler warnings nuclear@1: // and (2) ensure that there are no problems with strict pointer aliasing. nuclear@1: union C2T_union { C c; T t; }; nuclear@1: nuclear@1: public: nuclear@1: // General purpose implementation for standard syncs. nuclear@1: inline static C Exchange_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Sync((PT)p, u.t); return u.c; } nuclear@1: inline static C Exchange_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Release((PT)p, u.t); return u.c; } nuclear@1: inline static C Exchange_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Acquire((PT)p, u.t); return u.c; } nuclear@1: inline static C Exchange_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_NoSync((PT)p, u.t); return u.c; } nuclear@1: inline static C ExchangeAdd_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Sync((PT)p, u.t); return u.c; } nuclear@1: inline static C ExchangeAdd_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Release((PT)p, u.t); return u.c; } nuclear@1: inline static C ExchangeAdd_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Acquire((PT)p, u.t); return u.c; } nuclear@1: inline static C ExchangeAdd_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_NoSync((PT)p, u.t); return u.c; } nuclear@1: inline static bool CompareAndSet_Sync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Sync((PT)p, cu.t, u.t); } nuclear@1: inline static bool CompareAndSet_Release(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Release((PT)p, cu.t, u.t); } nuclear@1: inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); } nuclear@1: inline static bool CompareAndSet_NoSync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_NoSync((PT)p, cu.t, u.t); } nuclear@1: // Loads and stores with memory fence. These have only the relevant versions. nuclear@1: inline static void Store_Release(volatile C* p, C val) { C2T_union u; u.c = val; Ops::Store_Release((PT)p, u.t); } nuclear@1: inline static C Load_Acquire(const volatile C* p) { C2T_union u; u.t = Ops::Load_Acquire((PT)p); return u.c; } nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: nuclear@1: // Atomic value base class - implements operations shared for integers and pointers. nuclear@1: template nuclear@1: class AtomicValueBase nuclear@1: { nuclear@1: protected: nuclear@1: typedef AtomicOps Ops; nuclear@1: public: nuclear@1: nuclear@1: volatile T Value; nuclear@1: nuclear@1: inline AtomicValueBase() { } nuclear@1: explicit inline AtomicValueBase(T val) { Ops::Store_Release(&Value, val); } nuclear@1: nuclear@1: // Most libraries (TBB and Joshua Scholar's) library do not do Load_Acquire nuclear@1: // here, since most algorithms do not require atomic loads. Needs some research. nuclear@1: inline operator T() const { return Value; } nuclear@1: nuclear@1: // *** Standard Atomic inlines nuclear@1: inline T Exchange_Sync(T val) { return Ops::Exchange_Sync(&Value, val); } nuclear@1: inline T Exchange_Release(T val) { return Ops::Exchange_Release(&Value, val); } nuclear@1: inline T Exchange_Acquire(T val) { return Ops::Exchange_Acquire(&Value, val); } nuclear@1: inline T Exchange_NoSync(T val) { return Ops::Exchange_NoSync(&Value, val); } nuclear@1: inline bool CompareAndSet_Sync(T c, T val) { return Ops::CompareAndSet_Sync(&Value, c, val); } nuclear@1: inline bool CompareAndSet_Release(T c, T val) { return Ops::CompareAndSet_Release(&Value, c, val); } nuclear@1: inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Relse(&Value, c, val); } nuclear@1: inline bool CompareAndSet_NoSync(T c, T val) { return Ops::CompareAndSet_NoSync(&Value, c, val); } nuclear@1: // Load & Store. nuclear@1: inline void Store_Release(T val) { Ops::Store_Release(&Value, val); } nuclear@1: inline T Load_Acquire() const { return Ops::Load_Acquire(&Value); } nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: // ***** AtomicPtr - Atomic pointer template nuclear@1: nuclear@1: // This pointer class supports atomic assignments with release, nuclear@1: // increment / decrement operations, and conditional compare + set. nuclear@1: nuclear@1: template nuclear@1: class AtomicPtr : public AtomicValueBase nuclear@1: { nuclear@1: typedef typename AtomicValueBase::Ops Ops; nuclear@1: nuclear@1: public: nuclear@1: // Initialize pointer value to 0 by default; use Store_Release only with explicit constructor. nuclear@1: inline AtomicPtr() : AtomicValueBase() { this->Value = 0; } nuclear@1: explicit inline AtomicPtr(T* val) : AtomicValueBase(val) { } nuclear@1: nuclear@1: // Pointer access. nuclear@1: inline T* operator -> () const { return this->Load_Acquire(); } nuclear@1: nuclear@1: // It looks like it is convenient to have Load_Acquire characteristics nuclear@1: // for this, since that is convenient for algorithms such as linked nuclear@1: // list traversals that can be added to bu another thread. nuclear@1: inline operator T* () const { return this->Load_Acquire(); } nuclear@1: nuclear@1: nuclear@1: // *** Standard Atomic inlines (applicable to pointers) nuclear@1: nuclear@1: // ExhangeAdd considers pointer size for pointers. nuclear@1: template nuclear@1: inline T* ExchangeAdd_Sync(I incr) { return Ops::ExchangeAdd_Sync(&this->Value, ((T*)0) + incr); } nuclear@1: template nuclear@1: inline T* ExchangeAdd_Release(I incr) { return Ops::ExchangeAdd_Release(&this->Value, ((T*)0) + incr); } nuclear@1: template nuclear@1: inline T* ExchangeAdd_Acquire(I incr) { return Ops::ExchangeAdd_Acquire(&this->Value, ((T*)0) + incr); } nuclear@1: template nuclear@1: inline T* ExchangeAdd_NoSync(I incr) { return Ops::ExchangeAdd_NoSync(&this->Value, ((T*)0) + incr); } nuclear@1: nuclear@1: // *** Atomic Operators nuclear@1: nuclear@1: inline T* operator = (T* val) { this->Store_Release(val); return val; } nuclear@1: nuclear@1: template nuclear@1: inline T* operator += (I val) { return ExchangeAdd_Sync(val) + val; } nuclear@1: template nuclear@1: inline T* operator -= (I val) { return operator += (-val); } nuclear@1: nuclear@1: inline T* operator ++ () { return ExchangeAdd_Sync(1) + 1; } nuclear@1: inline T* operator -- () { return ExchangeAdd_Sync(-1) - 1; } nuclear@1: inline T* operator ++ (int) { return ExchangeAdd_Sync(1); } nuclear@1: inline T* operator -- (int) { return ExchangeAdd_Sync(-1); } nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: // ***** AtomicInt - Atomic integer template nuclear@1: nuclear@1: // Implements an atomic integer type; the exact type to use is provided nuclear@1: // as an argument. Supports atomic Acquire / Release semantics, atomic nuclear@1: // arithmetic operations, and atomic conditional compare + set. nuclear@1: nuclear@1: template nuclear@1: class AtomicInt : public AtomicValueBase nuclear@1: { nuclear@1: typedef typename AtomicValueBase::Ops Ops; nuclear@1: nuclear@1: public: nuclear@1: inline AtomicInt() : AtomicValueBase() { } nuclear@1: explicit inline AtomicInt(T val) : AtomicValueBase(val) { } nuclear@1: nuclear@1: nuclear@1: // *** Standard Atomic inlines (applicable to int) nuclear@1: inline T ExchangeAdd_Sync(T val) { return Ops::ExchangeAdd_Sync(&this->Value, val); } nuclear@1: inline T ExchangeAdd_Release(T val) { return Ops::ExchangeAdd_Release(&this->Value, val); } nuclear@1: inline T ExchangeAdd_Acquire(T val) { return Ops::ExchangeAdd_Acquire(&this->Value, val); } nuclear@1: inline T ExchangeAdd_NoSync(T val) { return Ops::ExchangeAdd_NoSync(&this->Value, val); } nuclear@1: // These increments could be more efficient because they don't return a value. nuclear@1: inline void Increment_Sync() { ExchangeAdd_Sync((T)1); } nuclear@1: inline void Increment_Release() { ExchangeAdd_Release((T)1); } nuclear@1: inline void Increment_Acquire() { ExchangeAdd_Acquire((T)1); } nuclear@1: inline void Increment_NoSync() { ExchangeAdd_NoSync((T)1); } nuclear@1: nuclear@1: // *** Atomic Operators nuclear@1: nuclear@1: inline T operator = (T val) { this->Store_Release(val); return val; } nuclear@1: inline T operator += (T val) { return ExchangeAdd_Sync(val) + val; } nuclear@1: inline T operator -= (T val) { return ExchangeAdd_Sync(0 - val) - val; } nuclear@1: nuclear@1: inline T operator ++ () { return ExchangeAdd_Sync((T)1) + 1; } nuclear@1: inline T operator -- () { return ExchangeAdd_Sync(((T)0)-1) - 1; } nuclear@1: inline T operator ++ (int) { return ExchangeAdd_Sync((T)1); } nuclear@1: inline T operator -- (int) { return ExchangeAdd_Sync(((T)0)-1); } nuclear@1: nuclear@1: // More complex atomic operations. Leave it to compiler whether to optimize them or not. nuclear@1: T operator &= (T arg) nuclear@1: { nuclear@1: T comp, newVal; nuclear@1: do { nuclear@1: comp = this->Value; nuclear@1: newVal = comp & arg; nuclear@1: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@1: return newVal; nuclear@1: } nuclear@1: nuclear@1: T operator |= (T arg) nuclear@1: { nuclear@1: T comp, newVal; nuclear@1: do { nuclear@1: comp = this->Value; nuclear@1: newVal = comp | arg; nuclear@1: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@1: return newVal; nuclear@1: } nuclear@1: nuclear@1: T operator ^= (T arg) nuclear@1: { nuclear@1: T comp, newVal; nuclear@1: do { nuclear@1: comp = this->Value; nuclear@1: newVal = comp ^ arg; nuclear@1: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@1: return newVal; nuclear@1: } nuclear@1: nuclear@1: T operator *= (T arg) nuclear@1: { nuclear@1: T comp, newVal; nuclear@1: do { nuclear@1: comp = this->Value; nuclear@1: newVal = comp * arg; nuclear@1: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@1: return newVal; nuclear@1: } nuclear@1: nuclear@1: T operator /= (T arg) nuclear@1: { nuclear@1: T comp, newVal; nuclear@1: do { nuclear@1: comp = this->Value; nuclear@1: newVal = comp / arg; nuclear@1: } while(!CompareAndSet_Sync(comp, newVal)); nuclear@1: return newVal; nuclear@1: } nuclear@1: nuclear@1: T operator >>= (unsigned bits) nuclear@1: { nuclear@1: T comp, newVal; nuclear@1: do { nuclear@1: comp = this->Value; nuclear@1: newVal = comp >> bits; nuclear@1: } while(!CompareAndSet_Sync(comp, newVal)); nuclear@1: return newVal; nuclear@1: } nuclear@1: nuclear@1: T operator <<= (unsigned bits) nuclear@1: { nuclear@1: T comp, newVal; nuclear@1: do { nuclear@1: comp = this->Value; nuclear@1: newVal = comp << bits; nuclear@1: } while(!this->CompareAndSet_Sync(comp, newVal)); nuclear@1: return newVal; nuclear@1: } nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: nuclear@1: //----------------------------------------------------------------------------------- nuclear@1: // ***** Lock nuclear@1: nuclear@1: // Lock is a simplest and most efficient mutual-exclusion lock class. nuclear@1: // Unlike Mutex, it cannot be waited on. nuclear@1: nuclear@1: class Lock nuclear@1: { nuclear@1: // NOTE: Locks are not allocatable and they themselves should not allocate nuclear@1: // memory by standard means. This is the case because StandardAllocator nuclear@1: // relies on this class. nuclear@1: // Make 'delete' private. Don't do this for 'new' since it can be redefined. nuclear@1: void operator delete(void*) {} nuclear@1: nuclear@1: nuclear@1: // *** Lock implementation for various platforms. nuclear@1: nuclear@1: #if !defined(OVR_ENABLE_THREADS) nuclear@1: nuclear@1: public: nuclear@1: // With no thread support, lock does nothing. nuclear@1: inline Lock() { } nuclear@1: inline Lock(unsigned) { } nuclear@1: inline ~Lock() { } nuclear@1: inline void DoLock() { } nuclear@1: inline void Unlock() { } nuclear@1: nuclear@1: // Windows. nuclear@1: #elif defined(OVR_OS_WIN32) nuclear@1: nuclear@1: CRITICAL_SECTION cs; nuclear@1: public: nuclear@1: Lock(unsigned spinCount = 0); nuclear@1: ~Lock(); nuclear@1: // Locking functions. nuclear@1: inline void DoLock() { ::EnterCriticalSection(&cs); } nuclear@1: inline void Unlock() { ::LeaveCriticalSection(&cs); } nuclear@1: nuclear@1: #else nuclear@1: pthread_mutex_t mutex; nuclear@1: nuclear@1: public: nuclear@1: static pthread_mutexattr_t RecursiveAttr; nuclear@1: static bool RecursiveAttrInit; nuclear@1: nuclear@1: Lock (unsigned dummy = 0) nuclear@1: { nuclear@1: if (!RecursiveAttrInit) nuclear@1: { nuclear@1: pthread_mutexattr_init(&RecursiveAttr); nuclear@1: pthread_mutexattr_settype(&RecursiveAttr, PTHREAD_MUTEX_RECURSIVE); nuclear@1: RecursiveAttrInit = 1; nuclear@1: } nuclear@1: pthread_mutex_init(&mutex,&RecursiveAttr); nuclear@1: } nuclear@1: ~Lock () { pthread_mutex_destroy(&mutex); } nuclear@1: inline void DoLock() { pthread_mutex_lock(&mutex); } nuclear@1: inline void Unlock() { pthread_mutex_unlock(&mutex); } nuclear@1: nuclear@1: #endif // OVR_ENABLE_THREDS nuclear@1: nuclear@1: nuclear@1: public: nuclear@1: // Locker class, used for automatic locking nuclear@1: class Locker nuclear@1: { nuclear@1: public: nuclear@1: Lock *pLock; nuclear@1: inline Locker(Lock *plock) nuclear@1: { pLock = plock; pLock->DoLock(); } nuclear@1: inline ~Locker() nuclear@1: { pLock->Unlock(); } nuclear@1: }; nuclear@1: }; nuclear@1: nuclear@1: nuclear@1: nuclear@1: } // OVR nuclear@1: nuclear@1: #endif