ovr_sdk

view LibOVR/Src/Kernel/OVR_String.h @ 0:1b39a1b46319

initial 0.4.4
author John Tsiombikas <nuclear@member.fsf.org>
date Wed, 14 Jan 2015 06:51:16 +0200
parents
children
line source
1 /************************************************************************************
3 PublicHeader: OVR_Kernel.h
4 Filename : OVR_String.h
5 Content : String UTF8 string implementation with copy-on-write semantics
6 (thread-safe for assignment but not modification).
7 Created : September 19, 2012
8 Notes :
10 Copyright : Copyright 2014 Oculus VR, LLC All Rights reserved.
12 Licensed under the Oculus VR Rift SDK License Version 3.2 (the "License");
13 you may not use the Oculus VR Rift SDK except in compliance with the License,
14 which is provided at the time of installation or download, or which
15 otherwise accompanies this software in either electronic or hard copy form.
17 You may obtain a copy of the License at
19 http://www.oculusvr.com/licenses/LICENSE-3.2
21 Unless required by applicable law or agreed to in writing, the Oculus VR SDK
22 distributed under the License is distributed on an "AS IS" BASIS,
23 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 See the License for the specific language governing permissions and
25 limitations under the License.
27 ************************************************************************************/
29 #ifndef OVR_String_h
30 #define OVR_String_h
32 #include "OVR_Types.h"
33 #include "OVR_Allocator.h"
34 #include "OVR_UTF8Util.h"
35 #include "OVR_Atomic.h"
36 #include "OVR_Std.h"
37 #include "OVR_Alg.h"
39 namespace OVR {
41 // ***** Classes
43 class String;
44 class StringBuffer;
47 //-----------------------------------------------------------------------------------
48 // ***** String Class
50 // String is UTF8 based string class with copy-on-write implementation
51 // for assignment.
53 class String
54 {
55 protected:
57 enum FlagConstants
58 {
59 //Flag_GetLength = 0x7FFFFFFF,
60 // This flag is set if GetLength() == GetSize() for a string.
61 // Avoid extra scanning is Substring and indexing logic.
62 Flag_LengthIsSizeShift = (sizeof(size_t)*8 - 1)
63 };
66 // Internal structure to hold string data
67 struct DataDesc
68 {
69 // Number of bytes. Will be the same as the number of chars if the characters
70 // are ascii, may not be equal to number of chars in case string data is UTF8.
71 size_t Size;
72 volatile int32_t RefCount;
73 char Data[1];
75 void AddRef()
76 {
77 AtomicOps<int32_t>::ExchangeAdd_NoSync(&RefCount, 1);
78 }
79 // Decrement ref count. This needs to be thread-safe, since
80 // a different thread could have also decremented the ref count.
81 // For example, if u start off with a ref count = 2. Now if u
82 // decrement the ref count and check against 0 in different
83 // statements, a different thread can also decrement the ref count
84 // in between our decrement and checking against 0 and will find
85 // the ref count = 0 and delete the object. This will lead to a crash
86 // when context switches to our thread and we'll be trying to delete
87 // an already deleted object. Hence decrementing the ref count and
88 // checking against 0 needs to made an atomic operation.
89 void Release()
90 {
91 if ((AtomicOps<int32_t>::ExchangeAdd_NoSync(&RefCount, -1) - 1) == 0)
92 OVR_FREE(this);
93 }
95 static size_t GetLengthFlagBit() { return size_t(1) << Flag_LengthIsSizeShift; }
96 size_t GetSize() const { return Size & ~GetLengthFlagBit() ; }
97 size_t GetLengthFlag() const { return Size & GetLengthFlagBit(); }
98 bool LengthIsSize() const { return GetLengthFlag() != 0; }
99 };
101 // Heap type of the string is encoded in the lower bits.
102 enum HeapType
103 {
104 HT_Global = 0, // Heap is global.
105 HT_Local = 1, // SF::String_loc: Heap is determined based on string's address.
106 HT_Dynamic = 2, // SF::String_temp: Heap is stored as a part of the class.
107 HT_Mask = 3
108 };
110 union {
111 DataDesc* pData;
112 size_t HeapTypeBits;
113 };
114 typedef union {
115 DataDesc* pData;
116 size_t HeapTypeBits;
117 } DataDescUnion;
119 inline HeapType GetHeapType() const { return (HeapType) (HeapTypeBits & HT_Mask); }
121 inline DataDesc* GetData() const
122 {
123 DataDescUnion u;
124 u.pData = pData;
125 u.HeapTypeBits = (u.HeapTypeBits & ~(size_t)HT_Mask);
126 return u.pData;
127 }
129 inline void SetData(DataDesc* pdesc)
130 {
131 HeapType ht = GetHeapType();
132 pData = pdesc;
133 OVR_ASSERT((HeapTypeBits & HT_Mask) == 0);
134 HeapTypeBits |= ht;
135 }
138 DataDesc* AllocData(size_t size, size_t lengthIsSize);
139 DataDesc* AllocDataCopy1(size_t size, size_t lengthIsSize,
140 const char* pdata, size_t copySize);
141 DataDesc* AllocDataCopy2(size_t size, size_t lengthIsSize,
142 const char* pdata1, size_t copySize1,
143 const char* pdata2, size_t copySize2);
145 // Special constructor to avoid data initalization when used in derived class.
146 struct NoConstructor { };
147 String(const NoConstructor&) { }
149 public:
151 // For initializing string with dynamic buffer
152 struct InitStruct
153 {
154 virtual ~InitStruct() { }
155 virtual void InitString(char* pbuffer, size_t size) const = 0;
156 };
159 // Constructors / Destructors.
160 String();
161 String(const char* data);
162 String(const char* data1, const char* pdata2, const char* pdata3 = 0);
163 String(const char* data, size_t buflen);
164 String(const String& src);
165 String(const StringBuffer& src);
166 String(const InitStruct& src, size_t size);
167 explicit String(const wchar_t* data);
169 // Destructor (Captain Obvious guarantees!)
170 ~String()
171 {
172 GetData()->Release();
173 }
175 // Declaration of NullString
176 static DataDesc NullData;
179 // *** General Functions
181 void Clear();
183 // For casting to a pointer to char.
184 operator const char*() const { return GetData()->Data; }
185 // Pointer to raw buffer.
186 const char* ToCStr() const { return GetData()->Data; }
188 // Returns number of bytes
189 size_t GetSize() const { return GetData()->GetSize() ; }
190 // Tells whether or not the string is empty
191 bool IsEmpty() const { return GetSize() == 0; }
193 // Returns number of characters
194 size_t GetLength() const;
195 int GetLengthI() const { return (int)GetLength(); }
197 // Returns character at the specified index
198 uint32_t GetCharAt(size_t index) const;
199 uint32_t GetFirstCharAt(size_t index, const char** offset) const;
200 uint32_t GetNextChar(const char** offset) const;
202 // Appends a character
203 void AppendChar(uint32_t ch);
205 // Append a string
206 void AppendString(const wchar_t* pstr, intptr_t len = -1);
207 void AppendString(const char* putf8str, intptr_t utf8StrSz = -1);
209 // Assigned a string with dynamic data (copied through initializer).
210 void AssignString(const InitStruct& src, size_t size);
211 // Assigns string with known size.
212 void AssignString(const char* putf8str, size_t size);
214 // Resize the string to the new size
215 // void Resize(size_t _size);
217 // Removes the character at posAt
218 void Remove(size_t posAt, intptr_t len = 1);
220 // Returns a String that's a substring of this.
221 // -start is the index of the first UTF8 character you want to include.
222 // -end is the index one past the last UTF8 character you want to include.
223 String Substring(size_t start, size_t end) const;
225 // Case-conversion
226 String ToUpper() const;
227 String ToLower() const;
229 // Inserts substr at posAt
230 String& Insert (const char* substr, size_t posAt, intptr_t len = -1);
232 // Inserts character at posAt
233 size_t InsertCharAt(uint32_t c, size_t posAt);
235 // Inserts substr at posAt, which is an index of a character (not byte).
236 // Of size is specified, it is in bytes.
237 // String& Insert(const uint32_t* substr, size_t posAt, intptr_t size = -1);
239 // Get Byte index of the character at position = index
240 size_t GetByteIndex(size_t index) const { return (size_t)UTF8Util::GetByteIndex(index, GetData()->Data); }
242 // Utility: case-insensitive string compare. stricmp() & strnicmp() are not
243 // ANSI or POSIX, do not seem to appear in Linux.
244 static int OVR_STDCALL CompareNoCase(const char* a, const char* b);
245 static int OVR_STDCALL CompareNoCase(const char* a, const char* b, intptr_t len);
247 // Hash function, case-insensitive
248 static size_t OVR_STDCALL BernsteinHashFunctionCIS(const void* pdataIn, size_t size, size_t seed = 5381);
250 // Hash function, case-sensitive
251 static size_t OVR_STDCALL BernsteinHashFunction(const void* pdataIn, size_t size, size_t seed = 5381);
254 // ***** File path parsing helper functions.
255 // Implemented in OVR_String_FilePath.cpp.
257 // Absolute paths can star with:
258 // - protocols: 'file://', 'http://'
259 // - windows drive: 'c:\'
260 // - UNC share name: '\\share'
261 // - unix root '/'
262 static bool HasAbsolutePath(const char* path);
263 static bool HasExtension(const char* path);
264 static bool HasProtocol(const char* path);
266 bool HasAbsolutePath() const { return HasAbsolutePath(ToCStr()); }
267 bool HasExtension() const { return HasExtension(ToCStr()); }
268 bool HasProtocol() const { return HasProtocol(ToCStr()); }
270 String GetProtocol() const; // Returns protocol, if any, with trailing '://'.
271 String GetPath() const; // Returns path with trailing '/'.
272 String GetFilename() const; // Returns filename, including extension.
273 String GetExtension() const; // Returns extension with a dot.
275 void StripProtocol(); // Strips front protocol, if any, from the string.
276 void StripExtension(); // Strips off trailing extension.
279 // Operators
280 // Assignment
281 void operator = (const char* str);
282 void operator = (const wchar_t* str);
283 void operator = (const String& src);
284 void operator = (const StringBuffer& src);
286 // Addition
287 void operator += (const String& src);
288 void operator += (const char* psrc) { AppendString(psrc); }
289 void operator += (const wchar_t* psrc) { AppendString(psrc); }
290 void operator += (char ch) { AppendChar(ch); }
291 String operator + (const char* str) const;
292 String operator + (const String& src) const;
294 // Comparison
295 bool operator == (const String& str) const
296 {
297 return (OVR_strcmp(GetData()->Data, str.GetData()->Data)== 0);
298 }
300 bool operator != (const String& str) const
301 {
302 return !operator == (str);
303 }
305 bool operator == (const char* str) const
306 {
307 return OVR_strcmp(GetData()->Data, str) == 0;
308 }
310 bool operator != (const char* str) const
311 {
312 return !operator == (str);
313 }
315 bool operator < (const char* pstr) const
316 {
317 return OVR_strcmp(GetData()->Data, pstr) < 0;
318 }
320 bool operator < (const String& str) const
321 {
322 return *this < str.GetData()->Data;
323 }
325 bool operator > (const char* pstr) const
326 {
327 return OVR_strcmp(GetData()->Data, pstr) > 0;
328 }
330 bool operator > (const String& str) const
331 {
332 return *this > str.GetData()->Data;
333 }
335 int CompareNoCase(const char* pstr) const
336 {
337 return CompareNoCase(GetData()->Data, pstr);
338 }
339 int CompareNoCase(const String& str) const
340 {
341 return CompareNoCase(GetData()->Data, str.ToCStr());
342 }
344 // Accesses raw bytes
345 const char& operator [] (int index) const
346 {
347 OVR_ASSERT(index >= 0 && (size_t)index < GetSize());
348 return GetData()->Data[index];
349 }
350 const char& operator [] (size_t index) const
351 {
352 OVR_ASSERT(index < GetSize());
353 return GetData()->Data[index];
354 }
357 // Case insensitive keys are used to look up insensitive string in hash tables
358 // for SWF files with version before SWF 7.
359 struct NoCaseKey
360 {
361 const String* pStr;
362 NoCaseKey(const String &str) : pStr(&str){};
363 };
365 bool operator == (const NoCaseKey& strKey) const
366 {
367 return (CompareNoCase(ToCStr(), strKey.pStr->ToCStr()) == 0);
368 }
369 bool operator != (const NoCaseKey& strKey) const
370 {
371 return !(CompareNoCase(ToCStr(), strKey.pStr->ToCStr()) == 0);
372 }
374 // Hash functor used for strings.
375 struct HashFunctor
376 {
377 size_t operator()(const String& data) const
378 {
379 size_t size = data.GetSize();
380 return String::BernsteinHashFunction((const char*)data, size);
381 }
382 };
383 // Case-insensitive hash functor used for strings. Supports additional
384 // lookup based on NoCaseKey.
385 struct NoCaseHashFunctor
386 {
387 size_t operator()(const String& data) const
388 {
389 size_t size = data.GetSize();
390 return String::BernsteinHashFunctionCIS((const char*)data, size);
391 }
392 size_t operator()(const NoCaseKey& data) const
393 {
394 size_t size = data.pStr->GetSize();
395 return String::BernsteinHashFunctionCIS((const char*)data.pStr->ToCStr(), size);
396 }
397 };
399 };
402 //-----------------------------------------------------------------------------------
403 // ***** String Buffer used for Building Strings
405 class StringBuffer
406 {
407 char* pData;
408 size_t Size;
409 size_t BufferSize;
410 size_t GrowSize;
411 mutable bool LengthIsSize;
413 public:
415 // Constructors / Destructor.
416 StringBuffer();
417 explicit StringBuffer(size_t growSize);
418 StringBuffer(const char* data);
419 StringBuffer(const char* data, size_t buflen);
420 StringBuffer(const String& src);
421 StringBuffer(const StringBuffer& src);
422 explicit StringBuffer(const wchar_t* data);
423 ~StringBuffer();
426 // Modify grow size used for growing/shrinking the buffer.
427 size_t GetGrowSize() const { return GrowSize; }
428 void SetGrowSize(size_t growSize);
431 // *** General Functions
432 // Does not release memory, just sets Size to 0
433 void Clear();
435 // For casting to a pointer to char.
436 operator const char*() const { return (pData) ? pData : ""; }
437 // Pointer to raw buffer.
438 const char* ToCStr() const { return (pData) ? pData : ""; }
440 // Returns number of bytes.
441 size_t GetSize() const { return Size ; }
442 // Tells whether or not the string is empty.
443 bool IsEmpty() const { return GetSize() == 0; }
445 // Returns number of characters
446 size_t GetLength() const;
448 // Returns character at the specified index
449 uint32_t GetCharAt(size_t index) const;
450 uint32_t GetFirstCharAt(size_t index, const char** offset) const;
451 uint32_t GetNextChar(const char** offset) const;
454 // Resize the string to the new size
455 void Resize(size_t _size);
456 void Reserve(size_t _size);
458 // Appends a character
459 void AppendChar(uint32_t ch);
461 // Append a string
462 void AppendString(const wchar_t* pstr, intptr_t len = -1);
463 void AppendString(const char* putf8str, intptr_t utf8StrSz = -1);
464 void AppendFormat(const char* format, ...);
466 // Assigned a string with dynamic data (copied through initializer).
467 //void AssignString(const InitStruct& src, size_t size);
469 // Inserts substr at posAt
470 void Insert (const char* substr, size_t posAt, intptr_t len = -1);
471 // Inserts character at posAt
472 size_t InsertCharAt(uint32_t c, size_t posAt);
474 // Assignment
475 void operator = (const char* str);
476 void operator = (const wchar_t* str);
477 void operator = (const String& src);
478 void operator = (const StringBuffer& src);
480 // Addition
481 void operator += (const String& src) { AppendString(src.ToCStr(),src.GetSize()); }
482 void operator += (const char* psrc) { AppendString(psrc); }
483 void operator += (const wchar_t* psrc) { AppendString(psrc); }
484 void operator += (char ch) { AppendChar(ch); }
485 //String operator + (const char* str) const ;
486 //String operator + (const String& src) const ;
488 // Accesses raw bytes
489 char& operator [] (int index)
490 {
491 OVR_ASSERT(((size_t)index) < GetSize());
492 return pData[index];
493 }
494 char& operator [] (size_t index)
495 {
496 OVR_ASSERT(index < GetSize());
497 return pData[index];
498 }
500 const char& operator [] (int index) const
501 {
502 OVR_ASSERT(((size_t)index) < GetSize());
503 return pData[index];
504 }
505 const char& operator [] (size_t index) const
506 {
507 OVR_ASSERT(index < GetSize());
508 return pData[index];
509 }
510 };
513 //
514 // Wrapper for string data. The data must have a guaranteed
515 // lifespan throughout the usage of the wrapper. Not intended for
516 // cached usage. Not thread safe.
517 //
518 class StringDataPtr
519 {
520 public:
521 StringDataPtr() : pStr(NULL), Size(0) {}
522 StringDataPtr(const StringDataPtr& p)
523 : pStr(p.pStr), Size(p.Size) {}
524 StringDataPtr(const char* pstr, size_t sz)
525 : pStr(pstr), Size(sz) {}
526 StringDataPtr(const char* pstr)
527 : pStr(pstr), Size((pstr != NULL) ? OVR_strlen(pstr) : 0) {}
528 explicit StringDataPtr(const String& str)
529 : pStr(str.ToCStr()), Size(str.GetSize()) {}
530 template <typename T, int N>
531 StringDataPtr(const T (&v)[N])
532 : pStr(v), Size(N) {}
534 public:
535 const char* ToCStr() const { return pStr; }
536 size_t GetSize() const { return Size; }
537 bool IsEmpty() const { return GetSize() == 0; }
539 // value is a prefix of this string
540 // Character's values are not compared.
541 bool IsPrefix(const StringDataPtr& value) const
542 {
543 return ToCStr() == value.ToCStr() && GetSize() >= value.GetSize();
544 }
545 // value is a suffix of this string
546 // Character's values are not compared.
547 bool IsSuffix(const StringDataPtr& value) const
548 {
549 return ToCStr() <= value.ToCStr() && (End()) == (value.End());
550 }
552 // Find first character.
553 // init_ind - initial index.
554 intptr_t FindChar(char c, size_t init_ind = 0) const
555 {
556 for (size_t i = init_ind; i < GetSize(); ++i)
557 if (pStr[i] == c)
558 return static_cast<intptr_t>(i);
560 return -1;
561 }
563 // Find last character.
564 // init_ind - initial index.
565 intptr_t FindLastChar(char c, size_t init_ind = ~0) const
566 {
567 if (init_ind == (size_t)~0 || init_ind > GetSize())
568 init_ind = GetSize();
569 else
570 ++init_ind;
572 for (size_t i = init_ind; i > 0; --i)
573 if (pStr[i - 1] == c)
574 return static_cast<intptr_t>(i - 1);
576 return -1;
577 }
579 // Create new object and trim size bytes from the left.
580 StringDataPtr GetTrimLeft(size_t size) const
581 {
582 // Limit trim size to the size of the string.
583 size = Alg::PMin(GetSize(), size);
585 return StringDataPtr(ToCStr() + size, GetSize() - size);
586 }
587 // Create new object and trim size bytes from the right.
588 StringDataPtr GetTrimRight(size_t size) const
589 {
590 // Limit trim to the size of the string.
591 size = Alg::PMin(GetSize(), size);
593 return StringDataPtr(ToCStr(), GetSize() - size);
594 }
596 // Create new object, which contains next token.
597 // Useful for parsing.
598 StringDataPtr GetNextToken(char separator = ':') const
599 {
600 size_t cur_pos = 0;
601 const char* cur_str = ToCStr();
603 for (; cur_pos < GetSize() && cur_str[cur_pos]; ++cur_pos)
604 {
605 if (cur_str[cur_pos] == separator)
606 {
607 break;
608 }
609 }
611 return StringDataPtr(ToCStr(), cur_pos);
612 }
614 // Trim size bytes from the left.
615 StringDataPtr& TrimLeft(size_t size)
616 {
617 // Limit trim size to the size of the string.
618 size = Alg::PMin(GetSize(), size);
619 pStr += size;
620 Size -= size;
622 return *this;
623 }
624 // Trim size bytes from the right.
625 StringDataPtr& TrimRight(size_t size)
626 {
627 // Limit trim to the size of the string.
628 size = Alg::PMin(GetSize(), size);
629 Size -= size;
631 return *this;
632 }
634 const char* Begin() const { return ToCStr(); }
635 const char* End() const { return ToCStr() + GetSize(); }
637 // Hash functor used string data pointers
638 struct HashFunctor
639 {
640 size_t operator()(const StringDataPtr& data) const
641 {
642 return String::BernsteinHashFunction(data.ToCStr(), data.GetSize());
643 }
644 };
646 bool operator== (const StringDataPtr& data) const
647 {
648 return (OVR_strncmp(pStr, data.pStr, data.Size) == 0);
649 }
651 protected:
652 const char* pStr;
653 size_t Size;
654 };
656 } // OVR
658 #endif