vrshoot: libs/assimp/ConvertUTF/ConvertUTF.h annotate

vrshoot

annotate libs/assimp/ConvertUTF/ConvertUTF.h @ 0:b2f14e535253

initial commit

author	John Tsiombikas <nuclear@member.fsf.org>
date	Sat, 01 Feb 2014 19:58:19 +0200
parents
children

rev	line source
nuclear@0	1 /*
nuclear@0	2 * Copyright 2001-2004 Unicode, Inc.
nuclear@0	3 *
nuclear@0	4 * Disclaimer
nuclear@0	5 *
nuclear@0	6 * This source code is provided as is by Unicode, Inc. No claims are
nuclear@0	7 * made as to fitness for any particular purpose. No warranties of any
nuclear@0	8 * kind are expressed or implied. The recipient agrees to determine
nuclear@0	9 * applicability of information provided. If this file has been
nuclear@0	10 * purchased on magnetic or optical media from Unicode, Inc., the
nuclear@0	11 * sole remedy for any claim will be exchange of defective media
nuclear@0	12 * within 90 days of receipt.
nuclear@0	13 *
nuclear@0	14 * Limitations on Rights to Redistribute This Code
nuclear@0	15 *
nuclear@0	16 * Unicode, Inc. hereby grants the right to freely use the information
nuclear@0	17 * supplied in this file in the creation of products supporting the
nuclear@0	18 * Unicode Standard, and to make copies of this file in any form
nuclear@0	19 * for internal or external distribution as long as this notice
nuclear@0	20 * remains attached.
nuclear@0	21 */
nuclear@0	22 #ifndef CONVERTUTF_H
nuclear@0	23 #define CONVERTUTF_H
nuclear@0	24 /* ---------------------------------------------------------------------
nuclear@0	25
nuclear@0	26 Conversions between UTF32, UTF-16, and UTF-8. Header file.
nuclear@0	27
nuclear@0	28 Several funtions are included here, forming a complete set of
nuclear@0	29 conversions between the three formats. UTF-7 is not included
nuclear@0	30 here, but is handled in a separate source file.
nuclear@0	31
nuclear@0	32 Each of these routines takes pointers to input buffers and output
nuclear@0	33 buffers. The input buffers are const.
nuclear@0	34
nuclear@0	35 Each routine converts the text between *sourceStart and sourceEnd,
nuclear@0	36 putting the result into the buffer between *targetStart and
nuclear@0	37 targetEnd. Note: the end pointers are after the last item: e.g.
nuclear@0	38 *(sourceEnd - 1) is the last item.
nuclear@0	39
nuclear@0	40 The return result indicates whether the conversion was successful,
nuclear@0	41 and if not, whether the problem was in the source or target buffers.
nuclear@0	42 (Only the first encountered problem is indicated.)
nuclear@0	43
nuclear@0	44 After the conversion, sourceStart and targetStart are both
nuclear@0	45 updated to point to the end of last text successfully converted in
nuclear@0	46 the respective buffers.
nuclear@0	47
nuclear@0	48 Input parameters:
nuclear@0	49 sourceStart - pointer to a pointer to the source buffer.
nuclear@0	50 The contents of this are modified on return so that
nuclear@0	51 it points at the next thing to be converted.
nuclear@0	52 targetStart - similarly, pointer to pointer to the target buffer.
nuclear@0	53 sourceEnd, targetEnd - respectively pointers to the ends of the
nuclear@0	54 two buffers, for overflow checking only.
nuclear@0	55
nuclear@0	56 These conversion functions take a ConversionFlags argument. When this
nuclear@0	57 flag is set to strict, both irregular sequences and isolated surrogates
nuclear@0	58 will cause an error. When the flag is set to lenient, both irregular
nuclear@0	59 sequences and isolated surrogates are converted.
nuclear@0	60
nuclear@0	61 Whether the flag is strict or lenient, all illegal sequences will cause
nuclear@0	62 an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
nuclear@0	63 or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
nuclear@0	64 must check for illegal sequences.
nuclear@0	65
nuclear@0	66 When the flag is set to lenient, characters over 0x10FFFF are converted
nuclear@0	67 to the replacement character; otherwise (when the flag is set to strict)
nuclear@0	68 they constitute an error.
nuclear@0	69
nuclear@0	70 Output parameters:
nuclear@0	71 The value "sourceIllegal" is returned from some routines if the input
nuclear@0	72 sequence is malformed. When "sourceIllegal" is returned, the source
nuclear@0	73 value will point to the illegal value that caused the problem. E.g.,
nuclear@0	74 in UTF-8 when a sequence is malformed, it points to the start of the
nuclear@0	75 malformed sequence.
nuclear@0	76
nuclear@0	77 Author: Mark E. Davis, 1994.
nuclear@0	78 Rev History: Rick McGowan, fixes & updates May 2001.
nuclear@0	79 Fixes & updates, Sept 2001.
nuclear@0	80
nuclear@0	81 ------------------------------------------------------------------------ */
nuclear@0	82
nuclear@0	83 /* ---------------------------------------------------------------------
nuclear@0	84 The following 4 definitions are compiler-specific.
nuclear@0	85 The C standard does not guarantee that wchar_t has at least
nuclear@0	86 16 bits, so wchar_t is no less portable than unsigned short!
nuclear@0	87 All should be unsigned values to avoid sign extension during
nuclear@0	88 bit mask & shift operations.
nuclear@0	89 ------------------------------------------------------------------------ */
nuclear@0	90
nuclear@0	91 typedef unsigned long UTF32; /* at least 32 bits */
nuclear@0	92 typedef unsigned short UTF16; /* at least 16 bits */
nuclear@0	93 typedef unsigned char UTF8; /* typically 8 bits */
nuclear@0	94 typedef unsigned char Boolean; /* 0 or 1 */
nuclear@0	95
nuclear@0	96 /* Some fundamental constants */
nuclear@0	97 #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
nuclear@0	98 #define UNI_MAX_BMP (UTF32)0x0000FFFF
nuclear@0	99 #define UNI_MAX_UTF16 (UTF32)0x0010FFFF
nuclear@0	100 #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
nuclear@0	101 #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
nuclear@0	102
nuclear@0	103 typedef enum {
nuclear@0	104 conversionOK, /* conversion successful */
nuclear@0	105 sourceExhausted, /* partial character in source, but hit end */
nuclear@0	106 targetExhausted, /* insuff. room in target for conversion */
nuclear@0	107 sourceIllegal /* source sequence is illegal/malformed */
nuclear@0	108 } ConversionResult;
nuclear@0	109
nuclear@0	110 typedef enum {
nuclear@0	111 strictConversion = 0,
nuclear@0	112 lenientConversion
nuclear@0	113 } ConversionFlags;
nuclear@0	114
nuclear@0	115 /* This is for C++ and does no harm in C */
nuclear@0	116 #ifdef __cplusplus
nuclear@0	117 extern "C" {
nuclear@0	118 #endif
nuclear@0	119
nuclear@0	120 ConversionResult ConvertUTF8toUTF16 (
nuclear@0	121 const UTF8** sourceStart, const UTF8* sourceEnd,
nuclear@0	122 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
nuclear@0	123
nuclear@0	124 ConversionResult ConvertUTF16toUTF8 (
nuclear@0	125 const UTF16** sourceStart, const UTF16* sourceEnd,
nuclear@0	126 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
nuclear@0	127
nuclear@0	128 ConversionResult ConvertUTF8toUTF32 (
nuclear@0	129 const UTF8** sourceStart, const UTF8* sourceEnd,
nuclear@0	130 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
nuclear@0	131
nuclear@0	132 ConversionResult ConvertUTF32toUTF8 (
nuclear@0	133 const UTF32** sourceStart, const UTF32* sourceEnd,
nuclear@0	134 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
nuclear@0	135
nuclear@0	136 ConversionResult ConvertUTF16toUTF32 (
nuclear@0	137 const UTF16** sourceStart, const UTF16* sourceEnd,
nuclear@0	138 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
nuclear@0	139
nuclear@0	140 ConversionResult ConvertUTF32toUTF16 (
nuclear@0	141 const UTF32** sourceStart, const UTF32* sourceEnd,
nuclear@0	142 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
nuclear@0	143
nuclear@0	144 Boolean isLegalUTF8Sequence(const UTF8 source, const UTF8 sourceEnd);
nuclear@0	145
nuclear@0	146 #ifdef __cplusplus
nuclear@0	147 }
nuclear@0	148 #endif
nuclear@0	149
nuclear@0	150 /* --------------------------------------------------------------------- */
nuclear@0	151 #endif // CONVERTUTF_H