rev |
line source |
nuclear@1
|
1 /************************************************************************************
|
nuclear@1
|
2
|
nuclear@1
|
3 PublicHeader: OVR.h
|
nuclear@1
|
4 Filename : OVR_UTF8Util.h
|
nuclear@1
|
5 Content : UTF8 Unicode character encoding/decoding support
|
nuclear@1
|
6 Created : September 19, 2012
|
nuclear@1
|
7 Notes :
|
nuclear@1
|
8
|
nuclear@1
|
9 Copyright : Copyright 2012 Oculus VR, Inc. All Rights reserved.
|
nuclear@1
|
10
|
nuclear@1
|
11 Use of this software is subject to the terms of the Oculus license
|
nuclear@1
|
12 agreement provided at the time of installation or download, or which
|
nuclear@1
|
13 otherwise accompanies this software in either electronic or hard copy form.
|
nuclear@1
|
14
|
nuclear@1
|
15 ************************************************************************************/
|
nuclear@1
|
16
|
nuclear@1
|
17 #ifndef OVR_UTF8Util_h
|
nuclear@1
|
18 #define OVR_UTF8Util_h
|
nuclear@1
|
19
|
nuclear@1
|
20 #include "OVR_Types.h"
|
nuclear@1
|
21
|
nuclear@1
|
22 namespace OVR { namespace UTF8Util {
|
nuclear@1
|
23
|
nuclear@1
|
24 //-----------------------------------------------------------------------------------
|
nuclear@1
|
25
|
nuclear@1
|
26 // *** UTF8 string length and indexing.
|
nuclear@1
|
27
|
nuclear@1
|
28 // Determines the length of UTF8 string in characters.
|
nuclear@1
|
29 // If source length is specified (in bytes), null 0 character is counted properly.
|
nuclear@1
|
30 SPInt OVR_STDCALL GetLength(const char* putf8str, SPInt length = -1);
|
nuclear@1
|
31
|
nuclear@1
|
32 // Gets a decoded UTF8 character at index; you can access up to the index returned
|
nuclear@1
|
33 // by GetLength. 0 will be returned for out of bounds access.
|
nuclear@1
|
34 UInt32 OVR_STDCALL GetCharAt(SPInt index, const char* putf8str, SPInt length = -1);
|
nuclear@1
|
35
|
nuclear@1
|
36 // Converts UTF8 character index into byte offset.
|
nuclear@1
|
37 // -1 is returned if index was out of bounds.
|
nuclear@1
|
38 SPInt OVR_STDCALL GetByteIndex(SPInt index, const char* putf8str, SPInt length = -1);
|
nuclear@1
|
39
|
nuclear@1
|
40
|
nuclear@1
|
41 // *** 16-bit Unicode string Encoding/Decoding routines.
|
nuclear@1
|
42
|
nuclear@1
|
43 // Determines the number of bytes necessary to encode a string.
|
nuclear@1
|
44 // Does not count the terminating 0 (null) character.
|
nuclear@1
|
45 SPInt OVR_STDCALL GetEncodeStringSize(const wchar_t* pchar, SPInt length = -1);
|
nuclear@1
|
46
|
nuclear@1
|
47 // Encodes a unicode (UCS-2 only) string into a buffer. The size of buffer must be at
|
nuclear@1
|
48 // least GetEncodeStringSize() + 1.
|
nuclear@1
|
49 void OVR_STDCALL EncodeString(char *pbuff, const wchar_t* pchar, SPInt length = -1);
|
nuclear@1
|
50
|
nuclear@1
|
51 // Decode UTF8 into a wchar_t buffer. Must have GetLength()+1 characters available.
|
nuclear@1
|
52 // Characters over 0xFFFF are replaced with 0xFFFD.
|
nuclear@1
|
53 // Returns the length of resulting string (number of characters)
|
nuclear@1
|
54 UPInt OVR_STDCALL DecodeString(wchar_t *pbuff, const char* putf8str, SPInt bytesLen = -1);
|
nuclear@1
|
55
|
nuclear@1
|
56
|
nuclear@1
|
57 // *** Individual character Encoding/Decoding.
|
nuclear@1
|
58
|
nuclear@1
|
59 // Determined the number of bytes necessary to encode a UCS character.
|
nuclear@1
|
60 int OVR_STDCALL GetEncodeCharSize(UInt32 ucsCharacter);
|
nuclear@1
|
61
|
nuclear@1
|
62 // Encodes the given UCS character into the given UTF-8 buffer.
|
nuclear@1
|
63 // Writes the data starting at buffer[offset], and
|
nuclear@1
|
64 // increments offset by the number of bytes written.
|
nuclear@1
|
65 // May write up to 6 bytes, so make sure there's room in the buffer
|
nuclear@1
|
66 void OVR_STDCALL EncodeChar(char* pbuffer, SPInt* poffset, UInt32 ucsCharacter);
|
nuclear@1
|
67
|
nuclear@1
|
68 // Return the next Unicode character in the UTF-8 encoded buffer.
|
nuclear@1
|
69 // Invalid UTF-8 sequences produce a U+FFFD character as output.
|
nuclear@1
|
70 // Advances *utf8_buffer past the character returned. Pointer advance
|
nuclear@1
|
71 // occurs even if the terminating 0 character is hit, since that allows
|
nuclear@1
|
72 // strings with middle '\0' characters to be supported.
|
nuclear@1
|
73 UInt32 OVR_STDCALL DecodeNextChar_Advance0(const char** putf8Buffer);
|
nuclear@1
|
74
|
nuclear@1
|
75 // Safer version of DecodeNextChar, which doesn't advance pointer if
|
nuclear@1
|
76 // null character is hit.
|
nuclear@1
|
77 inline UInt32 DecodeNextChar(const char** putf8Buffer)
|
nuclear@1
|
78 {
|
nuclear@1
|
79 UInt32 ch = DecodeNextChar_Advance0(putf8Buffer);
|
nuclear@1
|
80 if (ch == 0)
|
nuclear@1
|
81 (*putf8Buffer)--;
|
nuclear@1
|
82 return ch;
|
nuclear@1
|
83 }
|
nuclear@1
|
84
|
nuclear@1
|
85
|
nuclear@1
|
86 }} // OVR::UTF8Util
|
nuclear@1
|
87
|
nuclear@1
|
88 #endif
|