oculus1

view libovr/Src/Kernel/OVR_Atomic.h @ 15:402cbb6d9ce3

added the shader as a separate header file
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 21 Sep 2013 04:15:20 +0300
parents e2f9e4603129
children
line source
1 /************************************************************************************
3 PublicHeader: OVR.h
4 Filename : OVR_Atomic.h
5 Content : Contains atomic operations and inline fastest locking
6 functionality. Will contain #ifdefs for OS efficiency.
7 Have non-thread-safe implementaion if not available.
8 Created : September 19, 2012
9 Notes :
11 Copyright : Copyright 2012 Oculus VR, Inc. All Rights reserved.
13 Use of this software is subject to the terms of the Oculus license
14 agreement provided at the time of installation or download, or which
15 otherwise accompanies this software in either electronic or hard copy form.
17 ************************************************************************************/
18 #ifndef OVR_Atomic_h
19 #define OVR_Atomic_h
21 #include "OVR_Types.h"
23 // Include System thread functionality.
24 #if defined(OVR_OS_WIN32)
25 #include <windows.h>
26 #else
27 #include <pthread.h>
28 #endif
31 namespace OVR {
34 // ****** Declared classes
36 // If there is NO thread support we implement AtomicOps and
37 // Lock objects as no-ops. The other classes are not defined.
38 template<class C> class AtomicOps;
39 template<class T> class AtomicInt;
40 template<class T> class AtomicPtr;
42 class Lock;
45 //-----------------------------------------------------------------------------------
46 // ***** AtomicOps
48 // Atomic operations are provided by the AtomicOps templates class,
49 // implemented through system-specific AtomicOpsRaw specializations.
50 // It provides several fundamental operations such as Exchange, ExchangeAdd
51 // CompareAndSet, and Store_Release. Each function includes several memory
52 // synchronization versions, important for multiprocessing CPUs with weak
53 // memory consistency. The following memory fencing strategies are supported:
54 //
55 // - NoSync. No memory synchronization is done for atomic op.
56 // - Release. All other memory writes are completed before atomic op
57 // writes its results.
58 // - Acquire. Further memory reads are forced to wait until atomic op
59 // executes, guaranteeing that the right values will be seen.
60 // - Sync. A combination of Release and Acquire.
63 // *** AtomicOpsRaw
65 // AtomicOpsRaw is a specialized template that provides atomic operations
66 // used by AtomicOps. This class has two fundamental qualities: (1) it
67 // defines a type T of correct size, and (2) provides operations that work
68 // atomically, such as Exchange_Sync and CompareAndSet_Release.
70 // AtomicOpsRawBase class contains shared constants/classes for AtomicOpsRaw.
71 // The primary thing is does is define sync class objects, whose destructor and
72 // constructor provide places to insert appropriate synchronization calls, on
73 // systems where such calls are necessary. So far, the breakdown is as follows:
74 //
75 // - X86 systems don't need custom syncs, since their exchange/atomic
76 // instructions are implicitly synchronized.
77 // - PowerPC requires lwsync/isync instructions that can use this mechanism.
78 // - If some other systems require a mechanism where syncing type is associated
79 // with a particular instruction, the default implementation (which implements
80 // all Sync, Acquire, and Release modes in terms of NoSync and fence) may not
81 // work. Ii that case it will need to be #ifdef-ed conditionally.
83 struct AtomicOpsRawBase
84 {
85 #if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_OS_WIN32) || defined(OVR_OS_IPHONE)
86 // Need to have empty constructor to avoid class 'unused' variable warning.
87 struct FullSync { inline FullSync() { } };
88 struct AcquireSync { inline AcquireSync() { } };
89 struct ReleaseSync { inline ReleaseSync() { } };
91 #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC)
92 struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("isync\n"); } };
93 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("isync\n"); } };
94 struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } };
96 #elif defined(OVR_CPU_MIPS)
97 struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("sync\n"); } };
98 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("sync\n"); } };
99 struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } };
101 #elif defined(OVR_CPU_ARM)
102 struct FullSync { inline FullSync() { asm volatile("dmb\n"); } ~FullSync() { asm volatile("dmb\n"); } };
103 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("dmb\n"); } };
104 struct ReleaseSync { inline ReleaseSync() { asm volatile("dmb\n"); } };
107 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4)
108 // __sync functions are already full sync
109 struct FullSync { inline FullSync() { } };
110 struct AcquireSync { inline AcquireSync() { } };
111 struct ReleaseSync { inline ReleaseSync() { } };
112 #endif
113 };
116 // 4-Byte raw data atomic op implementation class.
117 struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase
118 {
119 #if !defined(OVR_ENABLE_THREADS)
121 // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl.
122 typedef UInt32 T;
124 // *** Thread - Safe Atomic Versions.
126 #elif defined(OVR_OS_WIN32)
128 // Use special defined for VC6, where volatile is not used and
129 // InterlockedCompareExchange is declared incorrectly.
130 typedef LONG T;
131 #if defined(OVR_CC_MSVC) && (OVR_CC_MSVC < 1300)
132 typedef T* InterlockTPtr;
133 typedef LPVOID ET;
134 typedef ET* InterlockETPtr;
135 #else
136 typedef volatile T* InterlockTPtr;
137 typedef T ET;
138 typedef InterlockTPtr InterlockETPtr;
139 #endif
140 inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange((InterlockTPtr)p, val); }
141 inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd((InterlockTPtr)p, val); }
142 inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange((InterlockETPtr)p, (ET)val, (ET)c) == (ET)c; }
144 #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC)
145 typedef UInt32 T;
146 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j)
147 {
148 UInt32 ret;
150 asm volatile("1:\n\t"
151 "lwarx %[r],0,%[i]\n\t"
152 "stwcx. %[j],0,%[i]\n\t"
153 "bne- 1b\n"
154 : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [j] "b" (j) : "cc", "memory");
156 return ret;
157 }
159 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j)
160 {
161 UInt32 dummy, ret;
163 asm volatile("1:\n\t"
164 "lwarx %[r],0,%[i]\n\t"
165 "add %[o],%[r],%[j]\n\t"
166 "stwcx. %[o],0,%[i]\n\t"
167 "bne- 1b\n"
168 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc", "memory");
170 return ret;
171 }
173 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value)
174 {
175 UInt32 ret;
177 asm volatile("1:\n\t"
178 "lwarx %[r],0,%[i]\n\t"
179 "cmpw 0,%[r],%[cmp]\n\t"
180 "mfcr %[r]\n\t"
181 "bne- 2f\n\t"
182 "stwcx. %[val],0,%[i]\n\t"
183 "bne- 1b\n\t"
184 "2:\n"
185 : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc", "memory");
187 return (ret & 0x20000000) ? 1 : 0;
188 }
190 #elif defined(OVR_CPU_MIPS)
191 typedef UInt32 T;
193 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j)
194 {
195 UInt32 ret;
197 asm volatile("1:\n\t"
198 "ll %[r],0(%[i])\n\t"
199 "sc %[j],0(%[i])\n\t"
200 "beq %[j],$0,1b\n\t"
201 "nop \n"
202 : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory");
204 return ret;
205 }
207 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j)
208 {
209 UInt32 ret;
211 asm volatile("1:\n\t"
212 "ll %[r],0(%[i])\n\t"
213 "addu %[j],%[r],%[j]\n\t"
214 "sc %[j],0(%[i])\n\t"
215 "beq %[j],$0,1b\n\t"
216 "nop \n"
217 : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory");
219 return ret;
220 }
222 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value)
223 {
224 UInt32 ret, dummy;
226 asm volatile("1:\n\t"
227 "move %[r],$0\n\t"
228 "ll %[o],0(%[i])\n\t"
229 "bne %[o],%[c],2f\n\t"
230 "move %[r],%[v]\n\t"
231 "sc %[r],0(%[i])\n\t"
232 "beq %[r],$0,1b\n\t"
233 "nop \n\t"
234 "2:\n"
235 : "+m" (*i),[r] "=&d" (ret), [o] "=&d" (dummy) : [i] "d" (i), [c] "d" (c), [v] "d" (value)
236 : "cc", "memory");
238 return ret;
239 }
241 #elif defined(OVR_CPU_ARM) && defined(OVR_CC_ARM)
242 typedef UInt32 T;
244 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j)
245 {
246 for(;;)
247 {
248 T r = __ldrex(i);
249 if (__strex(j, i) == 0)
250 return r;
251 }
252 }
253 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j)
254 {
255 for(;;)
256 {
257 T r = __ldrex(i);
258 if (__strex(r + j, i) == 0)
259 return r;
260 }
261 }
263 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value)
264 {
265 for(;;)
266 {
267 T r = __ldrex(i);
268 if (r != c)
269 return 0;
270 if (__strex(value, i) == 0)
271 return 1;
272 }
273 }
275 #elif defined(OVR_CPU_ARM)
276 typedef UInt32 T;
278 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j)
279 {
280 UInt32 ret, dummy;
282 asm volatile("1:\n\t"
283 "ldrex %[r],[%[i]]\n\t"
284 "strex %[t],%[j],[%[i]]\n\t"
285 "cmp %[t],#0\n\t"
286 "bne 1b\n\t"
287 : "+m" (*i), [r] "=&r" (ret), [t] "=&r" (dummy) : [i] "r" (i), [j] "r" (j) : "cc", "memory");
289 return ret;
290 }
292 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j)
293 {
294 UInt32 ret, dummy, test;
296 asm volatile("1:\n\t"
297 "ldrex %[r],[%[i]]\n\t"
298 "add %[o],%[r],%[j]\n\t"
299 "strex %[t],%[o],[%[i]]\n\t"
300 "cmp %[t],#0\n\t"
301 "bne 1b\n\t"
302 : "+m" (*i), [r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [j] "r" (j) : "cc", "memory");
304 return ret;
305 }
307 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value)
308 {
309 UInt32 ret = 1, dummy, test;
311 asm volatile("1:\n\t"
312 "ldrex %[o],[%[i]]\n\t"
313 "cmp %[o],%[c]\n\t"
314 "bne 2f\n\t"
315 "strex %[r],%[v],[%[i]]\n\t"
316 "cmp %[r],#0\n\t"
317 "bne 1b\n\t"
318 "2:\n"
319 : "+m" (*i),[r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [c] "r" (c), [v] "r" (value)
320 : "cc", "memory");
322 return !ret;
323 }
325 #elif defined(OVR_CPU_X86)
326 typedef UInt32 T;
328 static inline UInt32 Exchange_NoSync(volatile UInt32 *i, UInt32 j)
329 {
330 asm volatile("xchgl %1,%[i]\n"
331 : "+m" (*i), "=q" (j) : [i] "m" (*i), "1" (j) : "cc", "memory");
333 return j;
334 }
336 static inline UInt32 ExchangeAdd_NoSync(volatile UInt32 *i, UInt32 j)
337 {
338 asm volatile("lock; xaddl %1,%[i]\n"
339 : "+m" (*i), "+q" (j) : [i] "m" (*i) : "cc", "memory");
341 return j;
342 }
344 static inline bool CompareAndSet_NoSync(volatile UInt32 *i, UInt32 c, UInt32 value)
345 {
346 UInt32 ret;
348 asm volatile("lock; cmpxchgl %[v],%[i]\n"
349 : "+m" (*i), "=a" (ret) : [i] "m" (*i), "1" (c), [v] "q" (value) : "cc", "memory");
351 return (ret == c);
352 }
354 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1)
356 typedef UInt32 T;
358 static inline T Exchange_NoSync(volatile T *i, T j)
359 {
360 T v;
361 do {
362 v = *i;
363 } while (!__sync_bool_compare_and_swap(i, v, j));
364 return v;
365 }
367 static inline T ExchangeAdd_NoSync(volatile T *i, T j)
368 {
369 return __sync_fetch_and_add(i, j);
370 }
372 static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value)
373 {
374 return __sync_bool_compare_and_swap(i, c, value);
375 }
377 #endif // OS
378 };
381 // 8-Byte raw data data atomic op implementation class.
382 // Currently implementation is provided only on systems with 64-bit pointers.
383 struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase
384 {
385 #if !defined(OVR_64BIT_POINTERS) || !defined(OVR_ENABLE_THREADS)
387 // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl.
388 typedef UInt64 T;
390 // *** Thread - Safe OS specific versions.
391 #elif defined(OVR_OS_WIN32)
393 // This is only for 64-bit systems.
394 typedef LONG64 T;
395 typedef volatile T* InterlockTPtr;
396 inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange64((InterlockTPtr)p, val); }
397 inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd64((InterlockTPtr)p, val); }
398 inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange64((InterlockTPtr)p, val, c) == c; }
400 #elif defined(OVR_CPU_PPC64)
402 typedef UInt64 T;
404 static inline UInt64 Exchange_NoSync(volatile UInt64 *i, UInt64 j)
405 {
406 UInt64 dummy, ret;
408 asm volatile("1:\n\t"
409 "ldarx %[r],0,%[i]\n\t"
410 "mr %[o],%[j]\n\t"
411 "stdcx. %[o],0,%[i]\n\t"
412 "bne- 1b\n"
413 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc");
415 return ret;
416 }
418 static inline UInt64 ExchangeAdd_NoSync(volatile UInt64 *i, UInt64 j)
419 {
420 UInt64 dummy, ret;
422 asm volatile("1:\n\t"
423 "ldarx %[r],0,%[i]\n\t"
424 "add %[o],%[r],%[j]\n\t"
425 "stdcx. %[o],0,%[i]\n\t"
426 "bne- 1b\n"
427 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc");
429 return ret;
430 }
432 static inline bool CompareAndSet_NoSync(volatile UInt64 *i, UInt64 c, UInt64 value)
433 {
434 UInt64 ret, dummy;
436 asm volatile("1:\n\t"
437 "ldarx %[r],0,%[i]\n\t"
438 "cmpw 0,%[r],%[cmp]\n\t"
439 "mfcr %[r]\n\t"
440 "bne- 2f\n\t"
441 "stdcx. %[val],0,%[i]\n\t"
442 "bne- 1b\n\t"
443 "2:\n"
444 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc");
446 return (ret & 0x20000000) ? 1 : 0;
447 }
449 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1)
451 typedef UInt64 T;
453 static inline T Exchange_NoSync(volatile T *i, T j)
454 {
455 T v;
456 do {
457 v = *i;
458 } while (!__sync_bool_compare_and_swap(i, v, j));
459 return v;
460 }
462 static inline T ExchangeAdd_NoSync(volatile T *i, T j)
463 {
464 return __sync_fetch_and_add(i, j);
465 }
467 static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value)
468 {
469 return __sync_bool_compare_and_swap(i, c, value);
470 }
472 #endif // OS
473 };
476 // Default implementation for AtomicOpsRaw; provides implementation of mem-fenced
477 // atomic operations where fencing is done with a sync object wrapped around a NoSync
478 // operation implemented in the base class. If such implementation is not possible
479 // on a given platform, #ifdefs can be used to disable it and then op functions can be
480 // implemented individually in the appropriate AtomicOpsRaw<size> class.
482 template<class O>
483 struct AtomicOpsRaw_DefImpl : public O
484 {
485 typedef typename O::T O_T;
486 typedef typename O::FullSync O_FullSync;
487 typedef typename O::AcquireSync O_AcquireSync;
488 typedef typename O::ReleaseSync O_ReleaseSync;
490 // If there is no thread support, provide the default implementation. In this case,
491 // the base class (0) must still provide the T declaration.
492 #ifndef OVR_ENABLE_THREADS
494 // Atomic exchange of val with argument. Returns old val.
495 inline static O_T Exchange_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p = val; return old; }
496 // Adds a new val to argument; returns its old val.
497 inline static O_T ExchangeAdd_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p += val; return old; }
498 // Compares the argument data with 'c' val.
499 // If succeeded, stores val int '*p' and returns true; otherwise returns false.
500 inline static bool CompareAndSet_NoSync(volatile O_T* p, O_T c, O_T val) { if (*p==c) { *p = val; return 1; } return 0; }
502 #endif
504 // If NoSync wrapped implementation may not be possible, it this block should be
505 // replaced with per-function implementation in O.
506 // "AtomicOpsRaw_DefImpl<O>::" prefix in calls below.
507 inline static O_T Exchange_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); }
508 inline static O_T Exchange_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); }
509 inline static O_T Exchange_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); }
510 inline static O_T ExchangeAdd_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); }
511 inline static O_T ExchangeAdd_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); }
512 inline static O_T ExchangeAdd_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); }
513 inline static bool CompareAndSet_Sync(volatile O_T* p, O_T c, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); }
514 inline static bool CompareAndSet_Release(volatile O_T* p, O_T c, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); }
515 inline static bool CompareAndSet_Acquire(volatile O_T* p, O_T c, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); }
517 // Loads and stores with memory fence. These have only the relevant versions.
518 #ifdef OVR_CPU_X86
519 // On X86, Store_Release is implemented as exchange. Note that we can also
520 // consider 'sfence' in the future, although it is not as compatible with older CPUs.
521 inline static void Store_Release(volatile O_T* p, O_T val) { Exchange_Release(p, val); }
522 #else
523 inline static void Store_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); *p = val; }
524 #endif
525 inline static O_T Load_Acquire(const volatile O_T* p) { O_AcquireSync sync; OVR_UNUSED(sync); return *p; }
526 };
529 template<int size>
530 struct AtomicOpsRaw : public AtomicOpsRawBase { };
532 template<>
533 struct AtomicOpsRaw<4> : public AtomicOpsRaw_DefImpl<AtomicOpsRaw_4ByteImpl>
534 {
535 // Ensure that assigned type size is correct.
536 AtomicOpsRaw()
537 { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl<AtomicOpsRaw_4ByteImpl>::T) == 4); }
538 };
539 template<>
540 struct AtomicOpsRaw<8> : public AtomicOpsRaw_DefImpl<AtomicOpsRaw_8ByteImpl>
541 {
542 AtomicOpsRaw()
543 { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl<AtomicOpsRaw_8ByteImpl>::T) == 8); }
544 };
547 // *** AtomicOps - implementation of atomic Ops for specified class
549 // Implements atomic ops on a class, provided that the object is either
550 // 4 or 8 bytes in size (depending on the AtomicOpsRaw specializations
551 // available). Relies on AtomicOpsRaw for much of implementation.
553 template<class C>
554 class AtomicOps
555 {
556 typedef AtomicOpsRaw<sizeof(C)> Ops;
557 typedef typename Ops::T T;
558 typedef volatile typename Ops::T* PT;
559 // We cast through unions to (1) avoid pointer size compiler warnings
560 // and (2) ensure that there are no problems with strict pointer aliasing.
561 union C2T_union { C c; T t; };
563 public:
564 // General purpose implementation for standard syncs.
565 inline static C Exchange_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Sync((PT)p, u.t); return u.c; }
566 inline static C Exchange_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Release((PT)p, u.t); return u.c; }
567 inline static C Exchange_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Acquire((PT)p, u.t); return u.c; }
568 inline static C Exchange_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_NoSync((PT)p, u.t); return u.c; }
569 inline static C ExchangeAdd_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Sync((PT)p, u.t); return u.c; }
570 inline static C ExchangeAdd_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Release((PT)p, u.t); return u.c; }
571 inline static C ExchangeAdd_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Acquire((PT)p, u.t); return u.c; }
572 inline static C ExchangeAdd_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_NoSync((PT)p, u.t); return u.c; }
573 inline static bool CompareAndSet_Sync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Sync((PT)p, cu.t, u.t); }
574 inline static bool CompareAndSet_Release(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Release((PT)p, cu.t, u.t); }
575 inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); }
576 inline static bool CompareAndSet_NoSync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_NoSync((PT)p, cu.t, u.t); }
577 // Loads and stores with memory fence. These have only the relevant versions.
578 inline static void Store_Release(volatile C* p, C val) { C2T_union u; u.c = val; Ops::Store_Release((PT)p, u.t); }
579 inline static C Load_Acquire(const volatile C* p) { C2T_union u; u.t = Ops::Load_Acquire((PT)p); return u.c; }
580 };
584 // Atomic value base class - implements operations shared for integers and pointers.
585 template<class T>
586 class AtomicValueBase
587 {
588 protected:
589 typedef AtomicOps<T> Ops;
590 public:
592 volatile T Value;
594 inline AtomicValueBase() { }
595 explicit inline AtomicValueBase(T val) { Ops::Store_Release(&Value, val); }
597 // Most libraries (TBB and Joshua Scholar's) library do not do Load_Acquire
598 // here, since most algorithms do not require atomic loads. Needs some research.
599 inline operator T() const { return Value; }
601 // *** Standard Atomic inlines
602 inline T Exchange_Sync(T val) { return Ops::Exchange_Sync(&Value, val); }
603 inline T Exchange_Release(T val) { return Ops::Exchange_Release(&Value, val); }
604 inline T Exchange_Acquire(T val) { return Ops::Exchange_Acquire(&Value, val); }
605 inline T Exchange_NoSync(T val) { return Ops::Exchange_NoSync(&Value, val); }
606 inline bool CompareAndSet_Sync(T c, T val) { return Ops::CompareAndSet_Sync(&Value, c, val); }
607 inline bool CompareAndSet_Release(T c, T val) { return Ops::CompareAndSet_Release(&Value, c, val); }
608 inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Relse(&Value, c, val); }
609 inline bool CompareAndSet_NoSync(T c, T val) { return Ops::CompareAndSet_NoSync(&Value, c, val); }
610 // Load & Store.
611 inline void Store_Release(T val) { Ops::Store_Release(&Value, val); }
612 inline T Load_Acquire() const { return Ops::Load_Acquire(&Value); }
613 };
616 // ***** AtomicPtr - Atomic pointer template
618 // This pointer class supports atomic assignments with release,
619 // increment / decrement operations, and conditional compare + set.
621 template<class T>
622 class AtomicPtr : public AtomicValueBase<T*>
623 {
624 typedef typename AtomicValueBase<T*>::Ops Ops;
626 public:
627 // Initialize pointer value to 0 by default; use Store_Release only with explicit constructor.
628 inline AtomicPtr() : AtomicValueBase<T*>() { this->Value = 0; }
629 explicit inline AtomicPtr(T* val) : AtomicValueBase<T*>(val) { }
631 // Pointer access.
632 inline T* operator -> () const { return this->Load_Acquire(); }
634 // It looks like it is convenient to have Load_Acquire characteristics
635 // for this, since that is convenient for algorithms such as linked
636 // list traversals that can be added to bu another thread.
637 inline operator T* () const { return this->Load_Acquire(); }
640 // *** Standard Atomic inlines (applicable to pointers)
642 // ExhangeAdd considers pointer size for pointers.
643 template<class I>
644 inline T* ExchangeAdd_Sync(I incr) { return Ops::ExchangeAdd_Sync(&this->Value, ((T*)0) + incr); }
645 template<class I>
646 inline T* ExchangeAdd_Release(I incr) { return Ops::ExchangeAdd_Release(&this->Value, ((T*)0) + incr); }
647 template<class I>
648 inline T* ExchangeAdd_Acquire(I incr) { return Ops::ExchangeAdd_Acquire(&this->Value, ((T*)0) + incr); }
649 template<class I>
650 inline T* ExchangeAdd_NoSync(I incr) { return Ops::ExchangeAdd_NoSync(&this->Value, ((T*)0) + incr); }
652 // *** Atomic Operators
654 inline T* operator = (T* val) { this->Store_Release(val); return val; }
656 template<class I>
657 inline T* operator += (I val) { return ExchangeAdd_Sync(val) + val; }
658 template<class I>
659 inline T* operator -= (I val) { return operator += (-val); }
661 inline T* operator ++ () { return ExchangeAdd_Sync(1) + 1; }
662 inline T* operator -- () { return ExchangeAdd_Sync(-1) - 1; }
663 inline T* operator ++ (int) { return ExchangeAdd_Sync(1); }
664 inline T* operator -- (int) { return ExchangeAdd_Sync(-1); }
665 };
668 // ***** AtomicInt - Atomic integer template
670 // Implements an atomic integer type; the exact type to use is provided
671 // as an argument. Supports atomic Acquire / Release semantics, atomic
672 // arithmetic operations, and atomic conditional compare + set.
674 template<class T>
675 class AtomicInt : public AtomicValueBase<T>
676 {
677 typedef typename AtomicValueBase<T>::Ops Ops;
679 public:
680 inline AtomicInt() : AtomicValueBase<T>() { }
681 explicit inline AtomicInt(T val) : AtomicValueBase<T>(val) { }
684 // *** Standard Atomic inlines (applicable to int)
685 inline T ExchangeAdd_Sync(T val) { return Ops::ExchangeAdd_Sync(&this->Value, val); }
686 inline T ExchangeAdd_Release(T val) { return Ops::ExchangeAdd_Release(&this->Value, val); }
687 inline T ExchangeAdd_Acquire(T val) { return Ops::ExchangeAdd_Acquire(&this->Value, val); }
688 inline T ExchangeAdd_NoSync(T val) { return Ops::ExchangeAdd_NoSync(&this->Value, val); }
689 // These increments could be more efficient because they don't return a value.
690 inline void Increment_Sync() { ExchangeAdd_Sync((T)1); }
691 inline void Increment_Release() { ExchangeAdd_Release((T)1); }
692 inline void Increment_Acquire() { ExchangeAdd_Acquire((T)1); }
693 inline void Increment_NoSync() { ExchangeAdd_NoSync((T)1); }
695 // *** Atomic Operators
697 inline T operator = (T val) { this->Store_Release(val); return val; }
698 inline T operator += (T val) { return ExchangeAdd_Sync(val) + val; }
699 inline T operator -= (T val) { return ExchangeAdd_Sync(0 - val) - val; }
701 inline T operator ++ () { return ExchangeAdd_Sync((T)1) + 1; }
702 inline T operator -- () { return ExchangeAdd_Sync(((T)0)-1) - 1; }
703 inline T operator ++ (int) { return ExchangeAdd_Sync((T)1); }
704 inline T operator -- (int) { return ExchangeAdd_Sync(((T)0)-1); }
706 // More complex atomic operations. Leave it to compiler whether to optimize them or not.
707 T operator &= (T arg)
708 {
709 T comp, newVal;
710 do {
711 comp = this->Value;
712 newVal = comp & arg;
713 } while(!this->CompareAndSet_Sync(comp, newVal));
714 return newVal;
715 }
717 T operator |= (T arg)
718 {
719 T comp, newVal;
720 do {
721 comp = this->Value;
722 newVal = comp | arg;
723 } while(!this->CompareAndSet_Sync(comp, newVal));
724 return newVal;
725 }
727 T operator ^= (T arg)
728 {
729 T comp, newVal;
730 do {
731 comp = this->Value;
732 newVal = comp ^ arg;
733 } while(!this->CompareAndSet_Sync(comp, newVal));
734 return newVal;
735 }
737 T operator *= (T arg)
738 {
739 T comp, newVal;
740 do {
741 comp = this->Value;
742 newVal = comp * arg;
743 } while(!this->CompareAndSet_Sync(comp, newVal));
744 return newVal;
745 }
747 T operator /= (T arg)
748 {
749 T comp, newVal;
750 do {
751 comp = this->Value;
752 newVal = comp / arg;
753 } while(!CompareAndSet_Sync(comp, newVal));
754 return newVal;
755 }
757 T operator >>= (unsigned bits)
758 {
759 T comp, newVal;
760 do {
761 comp = this->Value;
762 newVal = comp >> bits;
763 } while(!CompareAndSet_Sync(comp, newVal));
764 return newVal;
765 }
767 T operator <<= (unsigned bits)
768 {
769 T comp, newVal;
770 do {
771 comp = this->Value;
772 newVal = comp << bits;
773 } while(!this->CompareAndSet_Sync(comp, newVal));
774 return newVal;
775 }
776 };
780 //-----------------------------------------------------------------------------------
781 // ***** Lock
783 // Lock is a simplest and most efficient mutual-exclusion lock class.
784 // Unlike Mutex, it cannot be waited on.
786 class Lock
787 {
788 // NOTE: Locks are not allocatable and they themselves should not allocate
789 // memory by standard means. This is the case because StandardAllocator
790 // relies on this class.
791 // Make 'delete' private. Don't do this for 'new' since it can be redefined.
792 void operator delete(void*) {}
795 // *** Lock implementation for various platforms.
797 #if !defined(OVR_ENABLE_THREADS)
799 public:
800 // With no thread support, lock does nothing.
801 inline Lock() { }
802 inline Lock(unsigned) { }
803 inline ~Lock() { }
804 inline void DoLock() { }
805 inline void Unlock() { }
807 // Windows.
808 #elif defined(OVR_OS_WIN32)
810 CRITICAL_SECTION cs;
811 public:
812 Lock(unsigned spinCount = 0);
813 ~Lock();
814 // Locking functions.
815 inline void DoLock() { ::EnterCriticalSection(&cs); }
816 inline void Unlock() { ::LeaveCriticalSection(&cs); }
818 #else
819 pthread_mutex_t mutex;
821 public:
822 static pthread_mutexattr_t RecursiveAttr;
823 static bool RecursiveAttrInit;
825 Lock (unsigned dummy = 0)
826 {
827 if (!RecursiveAttrInit)
828 {
829 pthread_mutexattr_init(&RecursiveAttr);
830 pthread_mutexattr_settype(&RecursiveAttr, PTHREAD_MUTEX_RECURSIVE);
831 RecursiveAttrInit = 1;
832 }
833 pthread_mutex_init(&mutex,&RecursiveAttr);
834 }
835 ~Lock () { pthread_mutex_destroy(&mutex); }
836 inline void DoLock() { pthread_mutex_lock(&mutex); }
837 inline void Unlock() { pthread_mutex_unlock(&mutex); }
839 #endif // OVR_ENABLE_THREDS
842 public:
843 // Locker class, used for automatic locking
844 class Locker
845 {
846 public:
847 Lock *pLock;
848 inline Locker(Lock *plock)
849 { pLock = plock; pLock->DoLock(); }
850 inline ~Locker()
851 { pLock->Unlock(); }
852 };
853 };
857 } // OVR
859 #endif