oculus1
view libovr/Src/Kernel/OVR_String.h @ 6:d6dd6372add6
foo
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sun, 15 Sep 2013 22:14:41 +0300 |
parents | e2f9e4603129 |
children |
line source
1 /************************************************************************************
3 PublicHeader: OVR.h
4 Filename : OVR_String.h
5 Content : String UTF8 string implementation with copy-on-write semantics
6 (thread-safe for assignment but not modification).
7 Created : September 19, 2012
8 Notes :
10 Copyright : Copyright 2012 Oculus VR, Inc. All Rights reserved.
12 Use of this software is subject to the terms of the Oculus license
13 agreement provided at the time of installation or download, or which
14 otherwise accompanies this software in either electronic or hard copy form.
16 ************************************************************************************/
18 #ifndef OVR_String_h
19 #define OVR_String_h
21 #include "OVR_Types.h"
22 #include "OVR_Allocator.h"
23 #include "OVR_UTF8Util.h"
24 #include "OVR_Atomic.h"
25 #include "OVR_Std.h"
26 #include "OVR_Alg.h"
28 namespace OVR {
30 // ***** Classes
32 class String;
33 class StringBuffer;
36 //-----------------------------------------------------------------------------------
37 // ***** String Class
39 // String is UTF8 based string class with copy-on-write implementation
40 // for assignment.
42 class String
43 {
44 protected:
46 enum FlagConstants
47 {
48 //Flag_GetLength = 0x7FFFFFFF,
49 // This flag is set if GetLength() == GetSize() for a string.
50 // Avoid extra scanning is Substring and indexing logic.
51 Flag_LengthIsSizeShift = (sizeof(UPInt)*8 - 1)
52 };
55 // Internal structure to hold string data
56 struct DataDesc
57 {
58 // Number of bytes. Will be the same as the number of chars if the characters
59 // are ascii, may not be equal to number of chars in case string data is UTF8.
60 UPInt Size;
61 volatile SInt32 RefCount;
62 char Data[1];
64 void AddRef()
65 {
66 AtomicOps<SInt32>::ExchangeAdd_NoSync(&RefCount, 1);
67 }
68 // Decrement ref count. This needs to be thread-safe, since
69 // a different thread could have also decremented the ref count.
70 // For example, if u start off with a ref count = 2. Now if u
71 // decrement the ref count and check against 0 in different
72 // statements, a different thread can also decrement the ref count
73 // in between our decrement and checking against 0 and will find
74 // the ref count = 0 and delete the object. This will lead to a crash
75 // when context switches to our thread and we'll be trying to delete
76 // an already deleted object. Hence decrementing the ref count and
77 // checking against 0 needs to made an atomic operation.
78 void Release()
79 {
80 if ((AtomicOps<SInt32>::ExchangeAdd_NoSync(&RefCount, -1) - 1) == 0)
81 OVR_FREE(this);
82 }
84 static UPInt GetLengthFlagBit() { return UPInt(1) << Flag_LengthIsSizeShift; }
85 UPInt GetSize() const { return Size & ~GetLengthFlagBit() ; }
86 UPInt GetLengthFlag() const { return Size & GetLengthFlagBit(); }
87 bool LengthIsSize() const { return GetLengthFlag() != 0; }
88 };
90 // Heap type of the string is encoded in the lower bits.
91 enum HeapType
92 {
93 HT_Global = 0, // Heap is global.
94 HT_Local = 1, // SF::String_loc: Heap is determined based on string's address.
95 HT_Dynamic = 2, // SF::String_temp: Heap is stored as a part of the class.
96 HT_Mask = 3
97 };
99 union {
100 DataDesc* pData;
101 UPInt HeapTypeBits;
102 };
103 typedef union {
104 DataDesc* pData;
105 UPInt HeapTypeBits;
106 } DataDescUnion;
108 inline HeapType GetHeapType() const { return (HeapType) (HeapTypeBits & HT_Mask); }
110 inline DataDesc* GetData() const
111 {
112 DataDescUnion u;
113 u.pData = pData;
114 u.HeapTypeBits = (u.HeapTypeBits & ~(UPInt)HT_Mask);
115 return u.pData;
116 }
118 inline void SetData(DataDesc* pdesc)
119 {
120 HeapType ht = GetHeapType();
121 pData = pdesc;
122 OVR_ASSERT((HeapTypeBits & HT_Mask) == 0);
123 HeapTypeBits |= ht;
124 }
127 DataDesc* AllocData(UPInt size, UPInt lengthIsSize);
128 DataDesc* AllocDataCopy1(UPInt size, UPInt lengthIsSize,
129 const char* pdata, UPInt copySize);
130 DataDesc* AllocDataCopy2(UPInt size, UPInt lengthIsSize,
131 const char* pdata1, UPInt copySize1,
132 const char* pdata2, UPInt copySize2);
134 // Special constructor to avoid data initalization when used in derived class.
135 struct NoConstructor { };
136 String(const NoConstructor&) { }
138 public:
140 // For initializing string with dynamic buffer
141 struct InitStruct
142 {
143 virtual ~InitStruct() { }
144 virtual void InitString(char* pbuffer, UPInt size) const = 0;
145 };
148 // Constructors / Destructors.
149 String();
150 String(const char* data);
151 String(const char* data1, const char* pdata2, const char* pdata3 = 0);
152 String(const char* data, UPInt buflen);
153 String(const String& src);
154 String(const StringBuffer& src);
155 String(const InitStruct& src, UPInt size);
156 explicit String(const wchar_t* data);
158 // Destructor (Captain Obvious guarantees!)
159 ~String()
160 {
161 GetData()->Release();
162 }
164 // Declaration of NullString
165 static DataDesc NullData;
168 // *** General Functions
170 void Clear();
172 // For casting to a pointer to char.
173 operator const char*() const { return GetData()->Data; }
174 // Pointer to raw buffer.
175 const char* ToCStr() const { return GetData()->Data; }
177 // Returns number of bytes
178 UPInt GetSize() const { return GetData()->GetSize() ; }
179 // Tells whether or not the string is empty
180 bool IsEmpty() const { return GetSize() == 0; }
182 // Returns number of characters
183 UPInt GetLength() const;
185 // Returns character at the specified index
186 UInt32 GetCharAt(UPInt index) const;
187 UInt32 GetFirstCharAt(UPInt index, const char** offset) const;
188 UInt32 GetNextChar(const char** offset) const;
190 // Appends a character
191 void AppendChar(UInt32 ch);
193 // Append a string
194 void AppendString(const wchar_t* pstr, SPInt len = -1);
195 void AppendString(const char* putf8str, SPInt utf8StrSz = -1);
197 // Assigned a string with dynamic data (copied through initializer).
198 void AssignString(const InitStruct& src, UPInt size);
199 // Assigns string with known size.
200 void AssignString(const char* putf8str, UPInt size);
202 // Resize the string to the new size
203 // void Resize(UPInt _size);
205 // Removes the character at posAt
206 void Remove(UPInt posAt, SPInt len = 1);
208 // Returns a String that's a substring of this.
209 // -start is the index of the first UTF8 character you want to include.
210 // -end is the index one past the last UTF8 character you want to include.
211 String Substring(UPInt start, UPInt end) const;
213 // Case-conversion
214 String ToUpper() const;
215 String ToLower() const;
217 // Inserts substr at posAt
218 String& Insert (const char* substr, UPInt posAt, SPInt len = -1);
220 // Inserts character at posAt
221 UPInt InsertCharAt(UInt32 c, UPInt posAt);
223 // Inserts substr at posAt, which is an index of a character (not byte).
224 // Of size is specified, it is in bytes.
225 // String& Insert(const UInt32* substr, UPInt posAt, SPInt size = -1);
227 // Get Byte index of the character at position = index
228 UPInt GetByteIndex(UPInt index) const { return (UPInt)UTF8Util::GetByteIndex(index, GetData()->Data); }
230 // Utility: case-insensitive string compare. stricmp() & strnicmp() are not
231 // ANSI or POSIX, do not seem to appear in Linux.
232 static int OVR_STDCALL CompareNoCase(const char* a, const char* b);
233 static int OVR_STDCALL CompareNoCase(const char* a, const char* b, SPInt len);
235 // Hash function, case-insensitive
236 static UPInt OVR_STDCALL BernsteinHashFunctionCIS(const void* pdataIn, UPInt size, UPInt seed = 5381);
238 // Hash function, case-sensitive
239 static UPInt OVR_STDCALL BernsteinHashFunction(const void* pdataIn, UPInt size, UPInt seed = 5381);
242 // ***** File path parsing helper functions.
243 // Implemented in OVR_String_FilePath.cpp.
245 // Absolute paths can star with:
246 // - protocols: 'file://', 'http://'
247 // - windows drive: 'c:\'
248 // - UNC share name: '\\share'
249 // - unix root '/'
250 static bool HasAbsolutePath(const char* path);
251 static bool HasExtension(const char* path);
252 static bool HasProtocol(const char* path);
254 bool HasAbsolutePath() const { return HasAbsolutePath(ToCStr()); }
255 bool HasExtension() const { return HasExtension(ToCStr()); }
256 bool HasProtocol() const { return HasProtocol(ToCStr()); }
258 String GetProtocol() const; // Returns protocol, if any, with trailing '://'.
259 String GetPath() const; // Returns path with trailing '/'.
260 String GetFilename() const; // Returns filename, including extension.
261 String GetExtension() const; // Returns extension with a dot.
263 void StripProtocol(); // Strips front protocol, if any, from the string.
264 void StripExtension(); // Strips off trailing extension.
267 // Operators
268 // Assignment
269 void operator = (const char* str);
270 void operator = (const wchar_t* str);
271 void operator = (const String& src);
272 void operator = (const StringBuffer& src);
274 // Addition
275 void operator += (const String& src);
276 void operator += (const char* psrc) { AppendString(psrc); }
277 void operator += (const wchar_t* psrc) { AppendString(psrc); }
278 void operator += (char ch) { AppendChar(ch); }
279 String operator + (const char* str) const;
280 String operator + (const String& src) const;
282 // Comparison
283 bool operator == (const String& str) const
284 {
285 return (OVR_strcmp(GetData()->Data, str.GetData()->Data)== 0);
286 }
288 bool operator != (const String& str) const
289 {
290 return !operator == (str);
291 }
293 bool operator == (const char* str) const
294 {
295 return OVR_strcmp(GetData()->Data, str) == 0;
296 }
298 bool operator != (const char* str) const
299 {
300 return !operator == (str);
301 }
303 bool operator < (const char* pstr) const
304 {
305 return OVR_strcmp(GetData()->Data, pstr) < 0;
306 }
308 bool operator < (const String& str) const
309 {
310 return *this < str.GetData()->Data;
311 }
313 bool operator > (const char* pstr) const
314 {
315 return OVR_strcmp(GetData()->Data, pstr) > 0;
316 }
318 bool operator > (const String& str) const
319 {
320 return *this > str.GetData()->Data;
321 }
323 int CompareNoCase(const char* pstr) const
324 {
325 return CompareNoCase(GetData()->Data, pstr);
326 }
327 int CompareNoCase(const String& str) const
328 {
329 return CompareNoCase(GetData()->Data, str.ToCStr());
330 }
332 // Accesses raw bytes
333 const char& operator [] (int index) const
334 {
335 OVR_ASSERT(index >= 0 && (UPInt)index < GetSize());
336 return GetData()->Data[index];
337 }
338 const char& operator [] (UPInt index) const
339 {
340 OVR_ASSERT(index < GetSize());
341 return GetData()->Data[index];
342 }
345 // Case insensitive keys are used to look up insensitive string in hash tables
346 // for SWF files with version before SWF 7.
347 struct NoCaseKey
348 {
349 const String* pStr;
350 NoCaseKey(const String &str) : pStr(&str){};
351 };
353 bool operator == (const NoCaseKey& strKey) const
354 {
355 return (CompareNoCase(ToCStr(), strKey.pStr->ToCStr()) == 0);
356 }
357 bool operator != (const NoCaseKey& strKey) const
358 {
359 return !(CompareNoCase(ToCStr(), strKey.pStr->ToCStr()) == 0);
360 }
362 // Hash functor used for strings.
363 struct HashFunctor
364 {
365 UPInt operator()(const String& data) const
366 {
367 UPInt size = data.GetSize();
368 return String::BernsteinHashFunction((const char*)data, size);
369 }
370 };
371 // Case-insensitive hash functor used for strings. Supports additional
372 // lookup based on NoCaseKey.
373 struct NoCaseHashFunctor
374 {
375 UPInt operator()(const String& data) const
376 {
377 UPInt size = data.GetSize();
378 return String::BernsteinHashFunctionCIS((const char*)data, size);
379 }
380 UPInt operator()(const NoCaseKey& data) const
381 {
382 UPInt size = data.pStr->GetSize();
383 return String::BernsteinHashFunctionCIS((const char*)data.pStr->ToCStr(), size);
384 }
385 };
387 };
390 //-----------------------------------------------------------------------------------
391 // ***** String Buffer used for Building Strings
393 class StringBuffer
394 {
395 char* pData;
396 UPInt Size;
397 UPInt BufferSize;
398 UPInt GrowSize;
399 mutable bool LengthIsSize;
401 public:
403 // Constructors / Destructor.
404 StringBuffer();
405 explicit StringBuffer(UPInt growSize);
406 StringBuffer(const char* data);
407 StringBuffer(const char* data, UPInt buflen);
408 StringBuffer(const String& src);
409 StringBuffer(const StringBuffer& src);
410 explicit StringBuffer(const wchar_t* data);
411 ~StringBuffer();
414 // Modify grow size used for growing/shrinking the buffer.
415 UPInt GetGrowSize() const { return GrowSize; }
416 void SetGrowSize(UPInt growSize);
419 // *** General Functions
420 // Does not release memory, just sets Size to 0
421 void Clear();
423 // For casting to a pointer to char.
424 operator const char*() const { return (pData) ? pData : ""; }
425 // Pointer to raw buffer.
426 const char* ToCStr() const { return (pData) ? pData : ""; }
428 // Returns number of bytes.
429 UPInt GetSize() const { return Size ; }
430 // Tells whether or not the string is empty.
431 bool IsEmpty() const { return GetSize() == 0; }
433 // Returns number of characters
434 UPInt GetLength() const;
436 // Returns character at the specified index
437 UInt32 GetCharAt(UPInt index) const;
438 UInt32 GetFirstCharAt(UPInt index, const char** offset) const;
439 UInt32 GetNextChar(const char** offset) const;
442 // Resize the string to the new size
443 void Resize(UPInt _size);
444 void Reserve(UPInt _size);
446 // Appends a character
447 void AppendChar(UInt32 ch);
449 // Append a string
450 void AppendString(const wchar_t* pstr, SPInt len = -1);
451 void AppendString(const char* putf8str, SPInt utf8StrSz = -1);
452 void AppendFormat(const char* format, ...);
454 // Assigned a string with dynamic data (copied through initializer).
455 //void AssignString(const InitStruct& src, UPInt size);
457 // Inserts substr at posAt
458 void Insert (const char* substr, UPInt posAt, SPInt len = -1);
459 // Inserts character at posAt
460 UPInt InsertCharAt(UInt32 c, UPInt posAt);
462 // Assignment
463 void operator = (const char* str);
464 void operator = (const wchar_t* str);
465 void operator = (const String& src);
467 // Addition
468 void operator += (const String& src) { AppendString(src.ToCStr(),src.GetSize()); }
469 void operator += (const char* psrc) { AppendString(psrc); }
470 void operator += (const wchar_t* psrc) { AppendString(psrc); }
471 void operator += (char ch) { AppendChar(ch); }
472 //String operator + (const char* str) const ;
473 //String operator + (const String& src) const ;
475 // Accesses raw bytes
476 char& operator [] (int index)
477 {
478 OVR_ASSERT(((UPInt)index) < GetSize());
479 return pData[index];
480 }
481 char& operator [] (UPInt index)
482 {
483 OVR_ASSERT(index < GetSize());
484 return pData[index];
485 }
487 const char& operator [] (int index) const
488 {
489 OVR_ASSERT(((UPInt)index) < GetSize());
490 return pData[index];
491 }
492 const char& operator [] (UPInt index) const
493 {
494 OVR_ASSERT(index < GetSize());
495 return pData[index];
496 }
497 };
500 //
501 // Wrapper for string data. The data must have a guaranteed
502 // lifespan throughout the usage of the wrapper. Not intended for
503 // cached usage. Not thread safe.
504 //
505 class StringDataPtr
506 {
507 public:
508 StringDataPtr() : pStr(NULL), Size(0) {}
509 StringDataPtr(const StringDataPtr& p)
510 : pStr(p.pStr), Size(p.Size) {}
511 StringDataPtr(const char* pstr, UPInt sz)
512 : pStr(pstr), Size(sz) {}
513 StringDataPtr(const char* pstr)
514 : pStr(pstr), Size((pstr != NULL) ? OVR_strlen(pstr) : 0) {}
515 explicit StringDataPtr(const String& str)
516 : pStr(str.ToCStr()), Size(str.GetSize()) {}
517 template <typename T, int N>
518 StringDataPtr(const T (&v)[N])
519 : pStr(v), Size(N) {}
521 public:
522 const char* ToCStr() const { return pStr; }
523 UPInt GetSize() const { return Size; }
524 bool IsEmpty() const { return GetSize() == 0; }
526 // value is a prefix of this string
527 // Character's values are not compared.
528 bool IsPrefix(const StringDataPtr& value) const
529 {
530 return ToCStr() == value.ToCStr() && GetSize() >= value.GetSize();
531 }
532 // value is a suffix of this string
533 // Character's values are not compared.
534 bool IsSuffix(const StringDataPtr& value) const
535 {
536 return ToCStr() <= value.ToCStr() && (End()) == (value.End());
537 }
539 // Find first character.
540 // init_ind - initial index.
541 SPInt FindChar(char c, UPInt init_ind = 0) const
542 {
543 for (UPInt i = init_ind; i < GetSize(); ++i)
544 if (pStr[i] == c)
545 return static_cast<SPInt>(i);
547 return -1;
548 }
550 // Find last character.
551 // init_ind - initial index.
552 SPInt FindLastChar(char c, UPInt init_ind = ~0) const
553 {
554 if (init_ind == (UPInt)~0 || init_ind > GetSize())
555 init_ind = GetSize();
556 else
557 ++init_ind;
559 for (UPInt i = init_ind; i > 0; --i)
560 if (pStr[i - 1] == c)
561 return static_cast<SPInt>(i - 1);
563 return -1;
564 }
566 // Create new object and trim size bytes from the left.
567 StringDataPtr GetTrimLeft(UPInt size) const
568 {
569 // Limit trim size to the size of the string.
570 size = Alg::PMin(GetSize(), size);
572 return StringDataPtr(ToCStr() + size, GetSize() - size);
573 }
574 // Create new object and trim size bytes from the right.
575 StringDataPtr GetTrimRight(UPInt size) const
576 {
577 // Limit trim to the size of the string.
578 size = Alg::PMin(GetSize(), size);
580 return StringDataPtr(ToCStr(), GetSize() - size);
581 }
583 // Create new object, which contains next token.
584 // Useful for parsing.
585 StringDataPtr GetNextToken(char separator = ':') const
586 {
587 UPInt cur_pos = 0;
588 const char* cur_str = ToCStr();
590 for (; cur_pos < GetSize() && cur_str[cur_pos]; ++cur_pos)
591 {
592 if (cur_str[cur_pos] == separator)
593 {
594 break;
595 }
596 }
598 return StringDataPtr(ToCStr(), cur_pos);
599 }
601 // Trim size bytes from the left.
602 StringDataPtr& TrimLeft(UPInt size)
603 {
604 // Limit trim size to the size of the string.
605 size = Alg::PMin(GetSize(), size);
606 pStr += size;
607 Size -= size;
609 return *this;
610 }
611 // Trim size bytes from the right.
612 StringDataPtr& TrimRight(UPInt size)
613 {
614 // Limit trim to the size of the string.
615 size = Alg::PMin(GetSize(), size);
616 Size -= size;
618 return *this;
619 }
621 const char* Begin() const { return ToCStr(); }
622 const char* End() const { return ToCStr() + GetSize(); }
624 // Hash functor used string data pointers
625 struct HashFunctor
626 {
627 UPInt operator()(const StringDataPtr& data) const
628 {
629 return String::BernsteinHashFunction(data.ToCStr(), data.GetSize());
630 }
631 };
633 bool operator== (const StringDataPtr& data) const
634 {
635 return (OVR_strncmp(pStr, data.pStr, data.Size) == 0);
636 }
638 protected:
639 const char* pStr;
640 UPInt Size;
641 };
643 } // OVR
645 #endif