nuclear@0: /************************************************************************************ nuclear@0: nuclear@0: PublicHeader: OVR_Kernel.h nuclear@0: Filename : OVR_UTF8Util.h nuclear@0: Content : UTF8 Unicode character encoding/decoding support nuclear@0: Created : September 19, 2012 nuclear@0: Notes : nuclear@0: nuclear@0: Copyright : Copyright 2014 Oculus VR, LLC All Rights reserved. nuclear@0: nuclear@0: Licensed under the Oculus VR Rift SDK License Version 3.2 (the "License"); nuclear@0: you may not use the Oculus VR Rift SDK except in compliance with the License, nuclear@0: which is provided at the time of installation or download, or which nuclear@0: otherwise accompanies this software in either electronic or hard copy form. nuclear@0: nuclear@0: You may obtain a copy of the License at nuclear@0: nuclear@0: http://www.oculusvr.com/licenses/LICENSE-3.2 nuclear@0: nuclear@0: Unless required by applicable law or agreed to in writing, the Oculus VR SDK nuclear@0: distributed under the License is distributed on an "AS IS" BASIS, nuclear@0: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. nuclear@0: See the License for the specific language governing permissions and nuclear@0: limitations under the License. nuclear@0: nuclear@0: ************************************************************************************/ nuclear@0: nuclear@0: #ifndef OVR_UTF8Util_h nuclear@0: #define OVR_UTF8Util_h nuclear@0: nuclear@0: #include "OVR_Types.h" nuclear@0: nuclear@0: namespace OVR { namespace UTF8Util { nuclear@0: nuclear@0: //----------------------------------------------------------------------------------- nuclear@0: nuclear@0: // *** UTF8 string length and indexing. nuclear@0: nuclear@0: // Determines the length of UTF8 string in characters. nuclear@0: // If source length is specified (in bytes), null 0 character is counted properly. nuclear@0: intptr_t OVR_STDCALL GetLength(const char* putf8str, intptr_t length = -1); nuclear@0: nuclear@0: // Gets a decoded UTF8 character at index; you can access up to the index returned nuclear@0: // by GetLength. 0 will be returned for out of bounds access. nuclear@0: uint32_t OVR_STDCALL GetCharAt(intptr_t index, const char* putf8str, intptr_t length = -1); nuclear@0: nuclear@0: // Converts UTF8 character index into byte offset. nuclear@0: // -1 is returned if index was out of bounds. nuclear@0: intptr_t OVR_STDCALL GetByteIndex(intptr_t index, const char* putf8str, intptr_t length = -1); nuclear@0: nuclear@0: nuclear@0: // *** 16-bit Unicode string Encoding/Decoding routines. nuclear@0: nuclear@0: // Determines the number of bytes necessary to encode a string. nuclear@0: // Does not count the terminating 0 (null) character. nuclear@0: intptr_t OVR_STDCALL GetEncodeStringSize(const wchar_t* pchar, intptr_t length = -1); nuclear@0: nuclear@0: // Encodes a unicode (UCS-2 only) string into a buffer. The size of buffer must be at nuclear@0: // least GetEncodeStringSize() + 1. nuclear@0: void OVR_STDCALL EncodeString(char *pbuff, const wchar_t* pchar, intptr_t length = -1); nuclear@0: nuclear@0: // Decode UTF8 into a wchar_t buffer. Must have GetLength()+1 characters available. nuclear@0: // Characters over 0xFFFF are replaced with 0xFFFD. nuclear@0: // Returns the length of resulting string (number of characters) nuclear@0: size_t OVR_STDCALL DecodeString(wchar_t *pbuff, const char* putf8str, intptr_t bytesLen = -1); nuclear@0: nuclear@0: nuclear@0: // *** Individual character Encoding/Decoding. nuclear@0: nuclear@0: // Determined the number of bytes necessary to encode a UCS character. nuclear@0: int OVR_STDCALL GetEncodeCharSize(uint32_t ucsCharacter); nuclear@0: nuclear@0: // Encodes the given UCS character into the given UTF-8 buffer. nuclear@0: // Writes the data starting at buffer[offset], and nuclear@0: // increments offset by the number of bytes written. nuclear@0: // May write up to 6 bytes, so make sure there's room in the buffer nuclear@0: void OVR_STDCALL EncodeChar(char* pbuffer, intptr_t* poffset, uint32_t ucsCharacter); nuclear@0: nuclear@0: // Return the next Unicode character in the UTF-8 encoded buffer. nuclear@0: // Invalid UTF-8 sequences produce a U+FFFD character as output. nuclear@0: // Advances *utf8_buffer past the character returned. Pointer advance nuclear@0: // occurs even if the terminating 0 character is hit, since that allows nuclear@0: // strings with middle '\0' characters to be supported. nuclear@0: uint32_t OVR_STDCALL DecodeNextChar_Advance0(const char** putf8Buffer); nuclear@0: nuclear@0: // Safer version of DecodeNextChar, which doesn't advance pointer if nuclear@0: // null character is hit. nuclear@0: inline uint32_t DecodeNextChar(const char** putf8Buffer) nuclear@0: { nuclear@0: uint32_t ch = DecodeNextChar_Advance0(putf8Buffer); nuclear@0: if (ch == 0) nuclear@0: (*putf8Buffer)--; nuclear@0: return ch; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: }} // OVR::UTF8Util nuclear@0: nuclear@0: #endif