nuclear@0: /************************************************************************************
nuclear@0: 
nuclear@0: PublicHeader:   OVR_Kernel.h
nuclear@0: Filename    :   OVR_UTF8Util.h
nuclear@0: Content     :   UTF8 Unicode character encoding/decoding support
nuclear@0: Created     :   September 19, 2012
nuclear@0: Notes       : 
nuclear@0: 
nuclear@0: Copyright   :   Copyright 2014 Oculus VR, LLC All Rights reserved.
nuclear@0: 
nuclear@0: Licensed under the Oculus VR Rift SDK License Version 3.2 (the "License"); 
nuclear@0: you may not use the Oculus VR Rift SDK except in compliance with the License, 
nuclear@0: which is provided at the time of installation or download, or which 
nuclear@0: otherwise accompanies this software in either electronic or hard copy form.
nuclear@0: 
nuclear@0: You may obtain a copy of the License at
nuclear@0: 
nuclear@0: http://www.oculusvr.com/licenses/LICENSE-3.2 
nuclear@0: 
nuclear@0: Unless required by applicable law or agreed to in writing, the Oculus VR SDK 
nuclear@0: distributed under the License is distributed on an "AS IS" BASIS,
nuclear@0: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
nuclear@0: See the License for the specific language governing permissions and
nuclear@0: limitations under the License.
nuclear@0: 
nuclear@0: ************************************************************************************/
nuclear@0: 
nuclear@0: #ifndef OVR_UTF8Util_h
nuclear@0: #define OVR_UTF8Util_h
nuclear@0: 
nuclear@0: #include "OVR_Types.h"
nuclear@0: 
nuclear@0: namespace OVR { namespace UTF8Util {
nuclear@0: 
nuclear@0: //-----------------------------------------------------------------------------------
nuclear@0: 
nuclear@0: // *** UTF8 string length and indexing.
nuclear@0: 
nuclear@0: // Determines the length of UTF8 string in characters.
nuclear@0: // If source length is specified (in bytes), null 0 character is counted properly.
nuclear@0: intptr_t OVR_STDCALL GetLength(const char* putf8str, intptr_t length = -1);
nuclear@0: 
nuclear@0: // Gets a decoded UTF8 character at index; you can access up to the index returned
nuclear@0: // by GetLength. 0 will be returned for out of bounds access.
nuclear@0: uint32_t OVR_STDCALL GetCharAt(intptr_t index, const char* putf8str, intptr_t length = -1);
nuclear@0: 
nuclear@0: // Converts UTF8 character index into byte offset.
nuclear@0: // -1 is returned if index was out of bounds.
nuclear@0: intptr_t OVR_STDCALL GetByteIndex(intptr_t index, const char* putf8str, intptr_t length = -1);
nuclear@0: 
nuclear@0: 
nuclear@0: // *** 16-bit Unicode string Encoding/Decoding routines.
nuclear@0: 
nuclear@0: // Determines the number of bytes necessary to encode a string.
nuclear@0: // Does not count the terminating 0 (null) character.
nuclear@0: intptr_t OVR_STDCALL GetEncodeStringSize(const wchar_t* pchar, intptr_t length = -1);
nuclear@0: 
nuclear@0: // Encodes a unicode (UCS-2 only) string into a buffer. The size of buffer must be at
nuclear@0: // least GetEncodeStringSize() + 1.
nuclear@0: void     OVR_STDCALL EncodeString(char *pbuff, const wchar_t* pchar, intptr_t length = -1);
nuclear@0: 
nuclear@0: // Decode UTF8 into a wchar_t buffer. Must have GetLength()+1 characters available.
nuclear@0: // Characters over 0xFFFF are replaced with 0xFFFD.
nuclear@0: // Returns the length of resulting string (number of characters)
nuclear@0: size_t   OVR_STDCALL DecodeString(wchar_t *pbuff, const char* putf8str, intptr_t bytesLen = -1);
nuclear@0: 
nuclear@0: 
nuclear@0: // *** Individual character Encoding/Decoding.
nuclear@0: 
nuclear@0: // Determined the number of bytes necessary to encode a UCS character.
nuclear@0: int      OVR_STDCALL GetEncodeCharSize(uint32_t ucsCharacter);
nuclear@0: 
nuclear@0: // Encodes the given UCS character into the given UTF-8 buffer.
nuclear@0: // Writes the data starting at buffer[offset], and 
nuclear@0: // increments offset by the number of bytes written.
nuclear@0: // May write up to 6 bytes, so make sure there's room in the buffer
nuclear@0: void     OVR_STDCALL EncodeChar(char* pbuffer, intptr_t* poffset, uint32_t ucsCharacter);
nuclear@0: 
nuclear@0: // Return the next Unicode character in the UTF-8 encoded buffer.
nuclear@0: // Invalid UTF-8 sequences produce a U+FFFD character as output.
nuclear@0: // Advances *utf8_buffer past the character returned. Pointer advance
nuclear@0: // occurs even if the terminating 0 character is hit, since that allows
nuclear@0: // strings with middle '\0' characters to be supported.
nuclear@0: uint32_t OVR_STDCALL DecodeNextChar_Advance0(const char** putf8Buffer);
nuclear@0: 
nuclear@0: // Safer version of DecodeNextChar, which doesn't advance pointer if
nuclear@0: // null character is hit.
nuclear@0: inline uint32_t DecodeNextChar(const char** putf8Buffer)
nuclear@0: {
nuclear@0:     uint32_t ch = DecodeNextChar_Advance0(putf8Buffer);
nuclear@0:     if (ch == 0)
nuclear@0:         (*putf8Buffer)--;
nuclear@0:     return ch;
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: }} // OVR::UTF8Util
nuclear@0: 
nuclear@0: #endif