ovr_sdk

view LibOVR/Src/Kernel/OVR_Atomic.h @ 0:1b39a1b46319

initial 0.4.4
author John Tsiombikas <nuclear@member.fsf.org>
date Wed, 14 Jan 2015 06:51:16 +0200
parents
children
line source
1 /************************************************************************************
3 PublicHeader: OVR_Kernel.h
4 Filename : OVR_Atomic.h
5 Content : Contains atomic operations and inline fastest locking
6 functionality. Will contain #ifdefs for OS efficiency.
7 Have non-thread-safe implementaion if not available.
8 Created : September 19, 2012
9 Notes :
11 Copyright : Copyright 2014 Oculus VR, LLC All Rights reserved.
13 Licensed under the Oculus VR Rift SDK License Version 3.2 (the "License");
14 you may not use the Oculus VR Rift SDK except in compliance with the License,
15 which is provided at the time of installation or download, or which
16 otherwise accompanies this software in either electronic or hard copy form.
18 You may obtain a copy of the License at
20 http://www.oculusvr.com/licenses/LICENSE-3.2
22 Unless required by applicable law or agreed to in writing, the Oculus VR SDK
23 distributed under the License is distributed on an "AS IS" BASIS,
24 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 See the License for the specific language governing permissions and
26 limitations under the License.
28 ************************************************************************************/
30 #ifndef OVR_Atomic_h
31 #define OVR_Atomic_h
33 #include "OVR_Types.h"
35 // Include System thread functionality.
36 #if defined(OVR_OS_MS) && !defined(OVR_OS_MS_MOBILE)
37 #ifndef WIN32_LEAN_AND_MEAN
38 #define WIN32_LEAN_AND_MEAN
39 #endif
40 #include <Windows.h>
41 #else
42 #include <pthread.h>
43 #endif
45 #ifdef OVR_CC_MSVC
46 #include <intrin.h>
47 #pragma intrinsic(_ReadBarrier, _WriteBarrier, _ReadWriteBarrier)
48 #endif
50 namespace OVR {
53 // ****** Declared classes
55 // If there is NO thread support we implement AtomicOps and
56 // Lock objects as no-ops. The other classes are not defined.
57 template<class C> class AtomicOps;
58 template<class T> class AtomicInt;
59 template<class T> class AtomicPtr;
61 class Lock;
64 //-----------------------------------------------------------------------------------
65 // ***** AtomicOps
67 // Atomic operations are provided by the AtomicOps templates class,
68 // implemented through system-specific AtomicOpsRaw specializations.
69 // It provides several fundamental operations such as Exchange, ExchangeAdd
70 // CompareAndSet, and Store_Release. Each function includes several memory
71 // synchronization versions, important for multiprocessing CPUs with weak
72 // memory consistency. The following memory fencing strategies are supported:
73 //
74 // - NoSync. No memory synchronization is done for atomic op.
75 // - Release. All other memory writes are completed before atomic op
76 // writes its results.
77 // - Acquire. Further memory reads are forced to wait until atomic op
78 // executes, guaranteeing that the right values will be seen.
79 // - Sync. A combination of Release and Acquire.
82 // *** AtomicOpsRaw
84 // AtomicOpsRaw is a specialized template that provides atomic operations
85 // used by AtomicOps. This class has two fundamental qualities: (1) it
86 // defines a type T of correct size, and (2) provides operations that work
87 // atomically, such as Exchange_Sync and CompareAndSet_Release.
89 // AtomicOpsRawBase class contains shared constants/classes for AtomicOpsRaw.
90 // The primary thing is does is define sync class objects, whose destructor and
91 // constructor provide places to insert appropriate synchronization calls, on
92 // systems where such calls are necessary. So far, the breakdown is as follows:
93 //
94 // - X86 systems don't need custom syncs, since their exchange/atomic
95 // instructions are implicitly synchronized.
96 // - PowerPC requires lwsync/isync instructions that can use this mechanism.
97 // - If some other systems require a mechanism where syncing type is associated
98 // with a particular instruction, the default implementation (which implements
99 // all Sync, Acquire, and Release modes in terms of NoSync and fence) may not
100 // work. Ii that case it will need to be #ifdef-ed conditionally.
102 struct AtomicOpsRawBase
103 {
104 #if !defined(OVR_ENABLE_THREADS) || defined(OVR_CPU_X86) || defined(OVR_CPU_X86_64)
105 // Need to have empty constructor to avoid class 'unused' variable warning.
106 struct FullSync { inline FullSync() { } };
107 struct AcquireSync { inline AcquireSync() { } };
108 struct ReleaseSync { inline ReleaseSync() { } };
110 #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC)
111 struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("isync\n"); } };
112 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("isync\n"); } };
113 struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } };
115 #elif defined(OVR_CPU_MIPS)
116 struct FullSync { inline FullSync() { asm volatile("sync\n"); } ~FullSync() { asm volatile("sync\n"); } };
117 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("sync\n"); } };
118 struct ReleaseSync { inline ReleaseSync() { asm volatile("sync\n"); } };
120 #elif defined(OVR_CPU_ARM) // Includes Android and iOS.
121 struct FullSync { inline FullSync() { asm volatile("dmb\n"); } ~FullSync() { asm volatile("dmb\n"); } };
122 struct AcquireSync { inline AcquireSync() { } ~AcquireSync() { asm volatile("dmb\n"); } };
123 struct ReleaseSync { inline ReleaseSync() { asm volatile("dmb\n"); } };
125 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4)
126 // __sync functions are already full sync
127 struct FullSync { inline FullSync() { } };
128 struct AcquireSync { inline AcquireSync() { } };
129 struct ReleaseSync { inline ReleaseSync() { } };
130 #endif
131 };
134 // 4-Byte raw data atomic op implementation class.
135 struct AtomicOpsRaw_4ByteImpl : public AtomicOpsRawBase
136 {
137 #if !defined(OVR_ENABLE_THREADS)
139 // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl.
140 typedef uint32_t T;
142 // *** Thread - Safe Atomic Versions.
144 #elif defined(OVR_OS_MS)
146 // Use special defined for VC6, where volatile is not used and
147 // InterlockedCompareExchange is declared incorrectly.
148 typedef LONG T;
149 #if defined(OVR_CC_MSVC) && (OVR_CC_MSVC < 1300)
150 typedef T* InterlockTPtr;
151 typedef LPVOID ET;
152 typedef ET* InterlockETPtr;
153 #else
154 typedef volatile T* InterlockTPtr;
155 typedef T ET;
156 typedef InterlockTPtr InterlockETPtr;
157 #endif
158 inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange((InterlockTPtr)p, val); }
159 inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd((InterlockTPtr)p, val); }
160 inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange((InterlockETPtr)p, (ET)val, (ET)c) == (ET)c; }
162 #elif defined(OVR_CPU_PPC64) || defined(OVR_CPU_PPC)
163 typedef uint32_t T;
164 static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j)
165 {
166 uint32_t ret;
168 asm volatile("1:\n\t"
169 "lwarx %[r],0,%[i]\n\t"
170 "stwcx. %[j],0,%[i]\n\t"
171 "bne- 1b\n"
172 : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [j] "b" (j) : "cc", "memory");
174 return ret;
175 }
177 static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j)
178 {
179 uint32_t dummy, ret;
181 asm volatile("1:\n\t"
182 "lwarx %[r],0,%[i]\n\t"
183 "add %[o],%[r],%[j]\n\t"
184 "stwcx. %[o],0,%[i]\n\t"
185 "bne- 1b\n"
186 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc", "memory");
188 return ret;
189 }
191 static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value)
192 {
193 uint32_t ret;
195 asm volatile("1:\n\t"
196 "lwarx %[r],0,%[i]\n\t"
197 "cmpw 0,%[r],%[cmp]\n\t"
198 "mfcr %[r]\n\t"
199 "bne- 2f\n\t"
200 "stwcx. %[val],0,%[i]\n\t"
201 "bne- 1b\n\t"
202 "2:\n"
203 : "+m" (*i), [r] "=&b" (ret) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc", "memory");
205 return (ret & 0x20000000) ? 1 : 0;
206 }
208 #elif defined(OVR_CPU_MIPS)
209 typedef uint32_t T;
211 static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j)
212 {
213 uint32_t ret;
215 asm volatile("1:\n\t"
216 "ll %[r],0(%[i])\n\t"
217 "sc %[j],0(%[i])\n\t"
218 "beq %[j],$0,1b\n\t"
219 "nop \n"
220 : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory");
222 return ret;
223 }
225 static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j)
226 {
227 uint32_t ret;
229 asm volatile("1:\n\t"
230 "ll %[r],0(%[i])\n\t"
231 "addu %[j],%[r],%[j]\n\t"
232 "sc %[j],0(%[i])\n\t"
233 "beq %[j],$0,1b\n\t"
234 "nop \n"
235 : "+m" (*i), [r] "=&d" (ret) : [i] "d" (i), [j] "d" (j) : "cc", "memory");
237 return ret;
238 }
240 static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value)
241 {
242 uint32_t ret, dummy;
244 asm volatile("1:\n\t"
245 "move %[r],$0\n\t"
246 "ll %[o],0(%[i])\n\t"
247 "bne %[o],%[c],2f\n\t"
248 "move %[r],%[v]\n\t"
249 "sc %[r],0(%[i])\n\t"
250 "beq %[r],$0,1b\n\t"
251 "nop \n\t"
252 "2:\n"
253 : "+m" (*i),[r] "=&d" (ret), [o] "=&d" (dummy) : [i] "d" (i), [c] "d" (c), [v] "d" (value)
254 : "cc", "memory");
256 return ret;
257 }
259 #elif defined(OVR_CPU_ARM) && defined(OVR_CC_ARM)
260 typedef uint32_t T;
262 static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j)
263 {
264 for(;;)
265 {
266 T r = __ldrex(i);
267 if (__strex(j, i) == 0)
268 return r;
269 }
270 }
271 static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j)
272 {
273 for(;;)
274 {
275 T r = __ldrex(i);
276 if (__strex(r + j, i) == 0)
277 return r;
278 }
279 }
281 static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value)
282 {
283 for(;;)
284 {
285 T r = __ldrex(i);
286 if (r != c)
287 return 0;
288 if (__strex(value, i) == 0)
289 return 1;
290 }
291 }
293 #elif defined(OVR_CPU_ARM)
294 typedef uint32_t T;
296 static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j)
297 {
298 uint32_t ret, dummy;
300 asm volatile("1:\n\t"
301 "ldrex %[r],[%[i]]\n\t"
302 "strex %[t],%[j],[%[i]]\n\t"
303 "cmp %[t],#0\n\t"
304 "bne 1b\n\t"
305 : "+m" (*i), [r] "=&r" (ret), [t] "=&r" (dummy) : [i] "r" (i), [j] "r" (j) : "cc", "memory");
307 return ret;
308 }
310 static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j)
311 {
312 uint32_t ret, dummy, test;
314 asm volatile("1:\n\t"
315 "ldrex %[r],[%[i]]\n\t"
316 "add %[o],%[r],%[j]\n\t"
317 "strex %[t],%[o],[%[i]]\n\t"
318 "cmp %[t],#0\n\t"
319 "bne 1b\n\t"
320 : "+m" (*i), [r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [j] "r" (j) : "cc", "memory");
322 return ret;
323 }
325 static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value)
326 {
327 uint32_t ret = 1, dummy, test;
329 asm volatile("1:\n\t"
330 "ldrex %[o],[%[i]]\n\t"
331 "cmp %[o],%[c]\n\t"
332 "bne 2f\n\t"
333 "strex %[r],%[v],[%[i]]\n\t"
334 "cmp %[r],#0\n\t"
335 "bne 1b\n\t"
336 "2:\n"
337 : "+m" (*i),[r] "=&r" (ret), [o] "=&r" (dummy), [t] "=&r" (test) : [i] "r" (i), [c] "r" (c), [v] "r" (value)
338 : "cc", "memory");
340 return !ret;
341 }
343 #elif defined(OVR_CPU_X86)
344 typedef uint32_t T;
346 static inline uint32_t Exchange_NoSync(volatile uint32_t *i, uint32_t j)
347 {
348 asm volatile("xchgl %1,%[i]\n"
349 : "+m" (*i), "=q" (j) : [i] "m" (*i), "1" (j) : "cc", "memory");
351 return j;
352 }
354 static inline uint32_t ExchangeAdd_NoSync(volatile uint32_t *i, uint32_t j)
355 {
356 asm volatile("lock; xaddl %1,%[i]\n"
357 : "+m" (*i), "+q" (j) : [i] "m" (*i) : "cc", "memory");
359 return j;
360 }
362 static inline bool CompareAndSet_NoSync(volatile uint32_t *i, uint32_t c, uint32_t value)
363 {
364 uint32_t ret;
366 asm volatile("lock; cmpxchgl %[v],%[i]\n"
367 : "+m" (*i), "=a" (ret) : [i] "m" (*i), "1" (c), [v] "q" (value) : "cc", "memory");
369 return (ret == c);
370 }
372 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1)
374 typedef uint32_t T;
376 static inline T Exchange_NoSync(volatile T *i, T j)
377 {
378 T v;
379 do {
380 v = *i;
381 } while (!__sync_bool_compare_and_swap(i, v, j));
382 return v;
383 }
385 static inline T ExchangeAdd_NoSync(volatile T *i, T j)
386 {
387 return __sync_fetch_and_add(i, j);
388 }
390 static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value)
391 {
392 return __sync_bool_compare_and_swap(i, c, value);
393 }
395 #endif // OS
396 };
399 // 8-Byte raw data data atomic op implementation class.
400 // Currently implementation is provided only on systems with 64-bit pointers.
401 struct AtomicOpsRaw_8ByteImpl : public AtomicOpsRawBase
402 {
403 #if !defined(OVR_64BIT_POINTERS) || !defined(OVR_ENABLE_THREADS)
405 // Provide a type for no-thread-support cases. Used by AtomicOpsRaw_DefImpl.
406 typedef uint64_t T;
408 // *** Thread - Safe OS specific versions.
409 #elif defined(OVR_OS_MS)
411 // This is only for 64-bit systems.
412 typedef LONG64 T;
413 typedef volatile T* InterlockTPtr;
414 inline static T Exchange_NoSync(volatile T* p, T val) { return InterlockedExchange64((InterlockTPtr)p, val); }
415 inline static T ExchangeAdd_NoSync(volatile T* p, T val) { return InterlockedExchangeAdd64((InterlockTPtr)p, val); }
416 inline static bool CompareAndSet_NoSync(volatile T* p, T c, T val) { return InterlockedCompareExchange64((InterlockTPtr)p, val, c) == c; }
418 #elif defined(OVR_CPU_PPC64)
420 typedef uint64_t T;
422 static inline uint64_t Exchange_NoSync(volatile uint64_t *i, uint64_t j)
423 {
424 uint64_t dummy, ret;
426 asm volatile("1:\n\t"
427 "ldarx %[r],0,%[i]\n\t"
428 "mr %[o],%[j]\n\t"
429 "stdcx. %[o],0,%[i]\n\t"
430 "bne- 1b\n"
431 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc");
433 return ret;
434 }
436 static inline uint64_t ExchangeAdd_NoSync(volatile uint64_t *i, uint64_t j)
437 {
438 uint64_t dummy, ret;
440 asm volatile("1:\n\t"
441 "ldarx %[r],0,%[i]\n\t"
442 "add %[o],%[r],%[j]\n\t"
443 "stdcx. %[o],0,%[i]\n\t"
444 "bne- 1b\n"
445 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [j] "b" (j) : "cc");
447 return ret;
448 }
450 static inline bool CompareAndSet_NoSync(volatile uint64_t *i, uint64_t c, uint64_t value)
451 {
452 uint64_t ret, dummy;
454 asm volatile("1:\n\t"
455 "ldarx %[r],0,%[i]\n\t"
456 "cmpw 0,%[r],%[cmp]\n\t"
457 "mfcr %[r]\n\t"
458 "bne- 2f\n\t"
459 "stdcx. %[val],0,%[i]\n\t"
460 "bne- 1b\n\t"
461 "2:\n"
462 : "+m" (*i), [r] "=&b" (ret), [o] "=&r" (dummy) : [i] "b" (i), [cmp] "b" (c), [val] "b" (value) : "cc");
464 return (ret & 0x20000000) ? 1 : 0;
465 }
467 #elif defined(OVR_CC_GNU) && (__GNUC__ >= 4 && __GNUC_MINOR__ >= 1)
469 typedef uint64_t T;
471 static inline T Exchange_NoSync(volatile T *i, T j)
472 {
473 T v;
474 do {
475 v = *i;
476 } while (!__sync_bool_compare_and_swap(i, v, j));
477 return v;
478 }
480 static inline T ExchangeAdd_NoSync(volatile T *i, T j)
481 {
482 return __sync_fetch_and_add(i, j);
483 }
485 static inline bool CompareAndSet_NoSync(volatile T *i, T c, T value)
486 {
487 return __sync_bool_compare_and_swap(i, c, value);
488 }
490 #endif // OS
491 };
494 // Default implementation for AtomicOpsRaw; provides implementation of mem-fenced
495 // atomic operations where fencing is done with a sync object wrapped around a NoSync
496 // operation implemented in the base class. If such implementation is not possible
497 // on a given platform, #ifdefs can be used to disable it and then op functions can be
498 // implemented individually in the appropriate AtomicOpsRaw<size> class.
500 template<class O>
501 struct AtomicOpsRaw_DefImpl : public O
502 {
503 typedef typename O::T O_T;
504 typedef typename O::FullSync O_FullSync;
505 typedef typename O::AcquireSync O_AcquireSync;
506 typedef typename O::ReleaseSync O_ReleaseSync;
508 // If there is no thread support, provide the default implementation. In this case,
509 // the base class (0) must still provide the T declaration.
510 #ifndef OVR_ENABLE_THREADS
512 // Atomic exchange of val with argument. Returns old val.
513 inline static O_T Exchange_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p = val; return old; }
514 // Adds a new val to argument; returns its old val.
515 inline static O_T ExchangeAdd_NoSync(volatile O_T* p, O_T val) { O_T old = *p; *p += val; return old; }
516 // Compares the argument data with 'c' val.
517 // If succeeded, stores val int '*p' and returns true; otherwise returns false.
518 inline static bool CompareAndSet_NoSync(volatile O_T* p, O_T c, O_T val) { if (*p==c) { *p = val; return 1; } return 0; }
520 #endif
522 // If NoSync wrapped implementation may not be possible, it this block should be
523 // replaced with per-function implementation in O.
524 // "AtomicOpsRaw_DefImpl<O>::" prefix in calls below.
525 inline static O_T Exchange_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); }
526 inline static O_T Exchange_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); }
527 inline static O_T Exchange_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::Exchange_NoSync(p, val); }
528 inline static O_T ExchangeAdd_Sync(volatile O_T* p, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); }
529 inline static O_T ExchangeAdd_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); }
530 inline static O_T ExchangeAdd_Acquire(volatile O_T* p, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::ExchangeAdd_NoSync(p, val); }
531 inline static bool CompareAndSet_Sync(volatile O_T* p, O_T c, O_T val) { O_FullSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); }
532 inline static bool CompareAndSet_Release(volatile O_T* p, O_T c, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); }
533 inline static bool CompareAndSet_Acquire(volatile O_T* p, O_T c, O_T val) { O_AcquireSync sync; OVR_UNUSED(sync); return AtomicOpsRaw_DefImpl<O>::CompareAndSet_NoSync(p,c,val); }
535 // Loads and stores with memory fence. These have only the relevant versions.
536 #ifdef OVR_CPU_X86
537 // On X86, Store_Release is implemented as exchange. Note that we can also
538 // consider 'sfence' in the future, although it is not as compatible with older CPUs.
539 inline static void Store_Release(volatile O_T* p, O_T val) { Exchange_Release(p, val); }
540 #else
541 inline static void Store_Release(volatile O_T* p, O_T val) { O_ReleaseSync sync; OVR_UNUSED(sync); *p = val; }
542 #endif
543 inline static O_T Load_Acquire(const volatile O_T* p)
544 {
545 O_AcquireSync sync;
546 OVR_UNUSED(sync);
548 #if defined(OVR_CC_MSVC)
549 _ReadBarrier(); // Compiler fence and load barrier
550 #elif defined(OVR_CC_INTEL)
551 __memory_barrier(); // Compiler fence
552 #else
553 // GCC-compatible:
554 asm volatile ("" : : : "memory"); // Compiler fence
555 #endif
557 return *p;
558 }
559 };
562 template<int size>
563 struct AtomicOpsRaw : public AtomicOpsRawBase { };
565 template<>
566 struct AtomicOpsRaw<4> : public AtomicOpsRaw_DefImpl<AtomicOpsRaw_4ByteImpl>
567 {
568 // Ensure that assigned type size is correct.
569 AtomicOpsRaw()
570 { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl<AtomicOpsRaw_4ByteImpl>::T) == 4); }
571 };
572 template<>
573 struct AtomicOpsRaw<8> : public AtomicOpsRaw_DefImpl<AtomicOpsRaw_8ByteImpl>
574 {
575 AtomicOpsRaw()
576 { OVR_COMPILER_ASSERT(sizeof(AtomicOpsRaw_DefImpl<AtomicOpsRaw_8ByteImpl>::T) == 8); }
577 };
580 // *** AtomicOps - implementation of atomic Ops for specified class
582 // Implements atomic ops on a class, provided that the object is either
583 // 4 or 8 bytes in size (depending on the AtomicOpsRaw specializations
584 // available). Relies on AtomicOpsRaw for much of implementation.
586 template<class C>
587 class AtomicOps
588 {
589 typedef AtomicOpsRaw<sizeof(C)> Ops;
590 typedef typename Ops::T T;
591 typedef volatile typename Ops::T* PT;
592 // We cast through unions to (1) avoid pointer size compiler warnings
593 // and (2) ensure that there are no problems with strict pointer aliasing.
594 union C2T_union { C c; T t; };
596 public:
597 // General purpose implementation for standard syncs.
598 inline static C Exchange_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Sync((PT)p, u.t); return u.c; }
599 inline static C Exchange_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Release((PT)p, u.t); return u.c; }
600 inline static C Exchange_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_Acquire((PT)p, u.t); return u.c; }
601 inline static C Exchange_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::Exchange_NoSync((PT)p, u.t); return u.c; }
602 inline static C ExchangeAdd_Sync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Sync((PT)p, u.t); return u.c; }
603 inline static C ExchangeAdd_Release(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Release((PT)p, u.t); return u.c; }
604 inline static C ExchangeAdd_Acquire(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_Acquire((PT)p, u.t); return u.c; }
605 inline static C ExchangeAdd_NoSync(volatile C* p, C val) { C2T_union u; u.c = val; u.t = Ops::ExchangeAdd_NoSync((PT)p, u.t); return u.c; }
606 inline static bool CompareAndSet_Sync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Sync((PT)p, cu.t, u.t); }
607 inline static bool CompareAndSet_Release(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Release((PT)p, cu.t, u.t); }
608 inline static bool CompareAndSet_Acquire(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); }
609 inline static bool CompareAndSet_NoSync(volatile C* p, C c, C val) { C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_NoSync((PT)p, cu.t, u.t); }
611 // Loads and stores with memory fence. These have only the relevant versions.
612 inline static void Store_Release(volatile C* p, C val) { C2T_union u; u.c = val; Ops::Store_Release((PT)p, u.t); }
613 inline static C Load_Acquire(const volatile C* p) { C2T_union u; u.t = Ops::Load_Acquire((PT)p); return u.c; }
615 // Deprecated typo error:
616 inline static bool CompareAndSet_Relse(volatile C* p, C c, C val){ C2T_union u,cu; u.c = val; cu.c = c; return Ops::CompareAndSet_Acquire((PT)p, cu.t, u.t); }
617 };
621 // Atomic value base class - implements operations shared for integers and pointers.
622 template<class T>
623 class AtomicValueBase
624 {
625 protected:
626 typedef AtomicOps<T> Ops;
627 public:
629 volatile T Value;
631 inline AtomicValueBase() { }
632 explicit inline AtomicValueBase(T val) { Ops::Store_Release(&Value, val); }
634 // Most libraries (TBB and Joshua Scholar's) library do not do Load_Acquire
635 // here, since most algorithms do not require atomic loads. Needs some research.
636 inline operator T() const { return Value; }
638 // *** Standard Atomic inlines
639 inline T Exchange_Sync(T val) { return Ops::Exchange_Sync(&Value, val); }
640 inline T Exchange_Release(T val) { return Ops::Exchange_Release(&Value, val); }
641 inline T Exchange_Acquire(T val) { return Ops::Exchange_Acquire(&Value, val); }
642 inline T Exchange_NoSync(T val) { return Ops::Exchange_NoSync(&Value, val); }
643 inline bool CompareAndSet_Sync(T c, T val) { return Ops::CompareAndSet_Sync(&Value, c, val); }
644 inline bool CompareAndSet_Release(T c, T val) { return Ops::CompareAndSet_Release(&Value, c, val); }
645 inline bool CompareAndSet_Acquire(T c, T val) { return Ops::CompareAndSet_Acquire(&Value, c, val); }
646 inline bool CompareAndSet_NoSync(T c, T val) { return Ops::CompareAndSet_NoSync(&Value, c, val); }
647 // Load & Store.
648 inline void Store_Release(T val) { Ops::Store_Release(&Value, val); }
649 inline T Load_Acquire() const { return Ops::Load_Acquire(&Value); }
650 };
653 // ***** AtomicPtr - Atomic pointer template
655 // This pointer class supports atomic assignments with release,
656 // increment / decrement operations, and conditional compare + set.
658 template<class T>
659 class AtomicPtr : public AtomicValueBase<T*>
660 {
661 typedef typename AtomicValueBase<T*>::Ops Ops;
663 public:
664 // Initialize pointer value to 0 by default; use Store_Release only with explicit constructor.
665 inline AtomicPtr() : AtomicValueBase<T*>() { this->Value = 0; }
666 explicit inline AtomicPtr(T* val) : AtomicValueBase<T*>(val) { }
668 // Pointer access.
669 inline T* operator -> () const { return this->Load_Acquire(); }
671 // It looks like it is convenient to have Load_Acquire characteristics
672 // for this, since that is convenient for algorithms such as linked
673 // list traversals that can be added to bu another thread.
674 inline operator T* () const { return this->Load_Acquire(); }
677 // *** Standard Atomic inlines (applicable to pointers)
679 // ExhangeAdd considers pointer size for pointers.
680 template<class I>
681 inline T* ExchangeAdd_Sync(I incr) { return Ops::ExchangeAdd_Sync(&this->Value, ((T*)0) + incr); }
682 template<class I>
683 inline T* ExchangeAdd_Release(I incr) { return Ops::ExchangeAdd_Release(&this->Value, ((T*)0) + incr); }
684 template<class I>
685 inline T* ExchangeAdd_Acquire(I incr) { return Ops::ExchangeAdd_Acquire(&this->Value, ((T*)0) + incr); }
686 template<class I>
687 inline T* ExchangeAdd_NoSync(I incr) { return Ops::ExchangeAdd_NoSync(&this->Value, ((T*)0) + incr); }
689 // *** Atomic Operators
691 inline T* operator = (T* val) { this->Store_Release(val); return val; }
693 template<class I>
694 inline T* operator += (I val) { return ExchangeAdd_Sync(val) + val; }
695 template<class I>
696 inline T* operator -= (I val) { return operator += (-val); }
698 inline T* operator ++ () { return ExchangeAdd_Sync(1) + 1; }
699 inline T* operator -- () { return ExchangeAdd_Sync(-1) - 1; }
700 inline T* operator ++ (int) { return ExchangeAdd_Sync(1); }
701 inline T* operator -- (int) { return ExchangeAdd_Sync(-1); }
702 };
705 // ***** AtomicInt - Atomic integer template
707 // Implements an atomic integer type; the exact type to use is provided
708 // as an argument. Supports atomic Acquire / Release semantics, atomic
709 // arithmetic operations, and atomic conditional compare + set.
711 template<class T>
712 class AtomicInt : public AtomicValueBase<T>
713 {
714 typedef typename AtomicValueBase<T>::Ops Ops;
716 public:
717 inline AtomicInt() : AtomicValueBase<T>() { }
718 explicit inline AtomicInt(T val) : AtomicValueBase<T>(val) { }
721 // *** Standard Atomic inlines (applicable to int)
722 inline T ExchangeAdd_Sync(T val) { return Ops::ExchangeAdd_Sync(&this->Value, val); }
723 inline T ExchangeAdd_Release(T val) { return Ops::ExchangeAdd_Release(&this->Value, val); }
724 inline T ExchangeAdd_Acquire(T val) { return Ops::ExchangeAdd_Acquire(&this->Value, val); }
725 inline T ExchangeAdd_NoSync(T val) { return Ops::ExchangeAdd_NoSync(&this->Value, val); }
726 // These increments could be more efficient because they don't return a value.
727 inline void Increment_Sync() { ExchangeAdd_Sync((T)1); }
728 inline void Increment_Release() { ExchangeAdd_Release((T)1); }
729 inline void Increment_Acquire() { ExchangeAdd_Acquire((T)1); }
730 inline void Increment_NoSync() { ExchangeAdd_NoSync((T)1); }
732 // *** Atomic Operators
734 inline T operator = (T val) { this->Store_Release(val); return val; }
735 inline T operator += (T val) { return ExchangeAdd_Sync(val) + val; }
736 inline T operator -= (T val) { return ExchangeAdd_Sync(0 - val) - val; }
738 inline T operator ++ () { return ExchangeAdd_Sync((T)1) + 1; }
739 inline T operator -- () { return ExchangeAdd_Sync(((T)0)-1) - 1; }
740 inline T operator ++ (int) { return ExchangeAdd_Sync((T)1); }
741 inline T operator -- (int) { return ExchangeAdd_Sync(((T)0)-1); }
743 // More complex atomic operations. Leave it to compiler whether to optimize them or not.
744 T operator &= (T arg)
745 {
746 T comp, newVal;
747 do {
748 comp = this->Value;
749 newVal = comp & arg;
750 } while(!this->CompareAndSet_Sync(comp, newVal));
751 return newVal;
752 }
754 T operator |= (T arg)
755 {
756 T comp, newVal;
757 do {
758 comp = this->Value;
759 newVal = comp | arg;
760 } while(!this->CompareAndSet_Sync(comp, newVal));
761 return newVal;
762 }
764 T operator ^= (T arg)
765 {
766 T comp, newVal;
767 do {
768 comp = this->Value;
769 newVal = comp ^ arg;
770 } while(!this->CompareAndSet_Sync(comp, newVal));
771 return newVal;
772 }
774 T operator *= (T arg)
775 {
776 T comp, newVal;
777 do {
778 comp = this->Value;
779 newVal = comp * arg;
780 } while(!this->CompareAndSet_Sync(comp, newVal));
781 return newVal;
782 }
784 T operator /= (T arg)
785 {
786 T comp, newVal;
787 do {
788 comp = this->Value;
789 newVal = comp / arg;
790 } while(!CompareAndSet_Sync(comp, newVal));
791 return newVal;
792 }
794 T operator >>= (unsigned bits)
795 {
796 T comp, newVal;
797 do {
798 comp = this->Value;
799 newVal = comp >> bits;
800 } while(!CompareAndSet_Sync(comp, newVal));
801 return newVal;
802 }
804 T operator <<= (unsigned bits)
805 {
806 T comp, newVal;
807 do {
808 comp = this->Value;
809 newVal = comp << bits;
810 } while(!this->CompareAndSet_Sync(comp, newVal));
811 return newVal;
812 }
813 };
816 //-----------------------------------------------------------------------------------
817 // ***** Lock
819 // Lock is a simplest and most efficient mutual-exclusion lock class.
820 // Unlike Mutex, it cannot be waited on.
822 class Lock
823 {
824 // NOTE: Locks are not allocatable and they themselves should not allocate
825 // memory by standard means. This is the case because StandardAllocator
826 // relies on this class.
827 // Make 'delete' private. Don't do this for 'new' since it can be redefined.
828 void operator delete(void*) {}
831 // *** Lock implementation for various platforms.
833 #if !defined(OVR_ENABLE_THREADS)
835 public:
836 // With no thread support, lock does nothing.
837 inline Lock() { }
838 inline Lock(unsigned) { }
839 inline ~Lock() { }
840 inline void DoLock() { }
841 inline void Unlock() { }
843 // Windows.
844 #elif defined(OVR_OS_MS)
846 CRITICAL_SECTION cs;
847 public:
848 Lock(unsigned spinCount = 10000); // Mutexes with non-zero spin counts usually result in better performance.
849 ~Lock();
850 // Locking functions.
851 inline void DoLock() { ::EnterCriticalSection(&cs); }
852 inline void Unlock() { ::LeaveCriticalSection(&cs); }
854 #else
855 pthread_mutex_t mutex;
857 public:
858 static pthread_mutexattr_t RecursiveAttr;
859 static bool RecursiveAttrInit;
861 Lock (unsigned spinCount = 0) // To do: Support spin count, probably via a custom lock implementation.
862 {
863 OVR_UNUSED(spinCount);
864 if (!RecursiveAttrInit)
865 {
866 pthread_mutexattr_init(&RecursiveAttr);
867 pthread_mutexattr_settype(&RecursiveAttr, PTHREAD_MUTEX_RECURSIVE);
868 RecursiveAttrInit = 1;
869 }
870 pthread_mutex_init(&mutex,&RecursiveAttr);
871 }
872 ~Lock () { pthread_mutex_destroy(&mutex); }
873 inline void DoLock() { pthread_mutex_lock(&mutex); }
874 inline void Unlock() { pthread_mutex_unlock(&mutex); }
876 #endif // OVR_ENABLE_THREDS
879 public:
880 // Locker class, used for automatic locking
881 class Locker
882 {
883 public:
884 Lock *pLock;
885 inline Locker(Lock *plock)
886 { pLock = plock; pLock->DoLock(); }
887 inline ~Locker()
888 { pLock->Unlock(); }
889 };
890 };
893 //-------------------------------------------------------------------------------------
894 // Globally shared Lock implementation used for MessageHandlers, etc.
896 class SharedLock
897 {
898 public:
899 SharedLock() : UseCount(0) {}
901 Lock* GetLockAddRef();
902 void ReleaseLock(Lock* plock);
904 private:
905 Lock* toLock() { return (Lock*)Buffer; }
907 // UseCount and max alignment.
908 volatile int UseCount;
909 uint64_t Buffer[(sizeof(Lock)+sizeof(uint64_t)-1)/sizeof(uint64_t)];
910 };
913 } // OVR
915 #endif