vrshoot

annotate libs/assimp/ConvertUTF/ConvertUTF.h @ 0:b2f14e535253

initial commit
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 01 Feb 2014 19:58:19 +0200
parents
children
rev   line source
nuclear@0 1 /*
nuclear@0 2 * Copyright 2001-2004 Unicode, Inc.
nuclear@0 3 *
nuclear@0 4 * Disclaimer
nuclear@0 5 *
nuclear@0 6 * This source code is provided as is by Unicode, Inc. No claims are
nuclear@0 7 * made as to fitness for any particular purpose. No warranties of any
nuclear@0 8 * kind are expressed or implied. The recipient agrees to determine
nuclear@0 9 * applicability of information provided. If this file has been
nuclear@0 10 * purchased on magnetic or optical media from Unicode, Inc., the
nuclear@0 11 * sole remedy for any claim will be exchange of defective media
nuclear@0 12 * within 90 days of receipt.
nuclear@0 13 *
nuclear@0 14 * Limitations on Rights to Redistribute This Code
nuclear@0 15 *
nuclear@0 16 * Unicode, Inc. hereby grants the right to freely use the information
nuclear@0 17 * supplied in this file in the creation of products supporting the
nuclear@0 18 * Unicode Standard, and to make copies of this file in any form
nuclear@0 19 * for internal or external distribution as long as this notice
nuclear@0 20 * remains attached.
nuclear@0 21 */
nuclear@0 22 #ifndef CONVERTUTF_H
nuclear@0 23 #define CONVERTUTF_H
nuclear@0 24 /* ---------------------------------------------------------------------
nuclear@0 25
nuclear@0 26 Conversions between UTF32, UTF-16, and UTF-8. Header file.
nuclear@0 27
nuclear@0 28 Several funtions are included here, forming a complete set of
nuclear@0 29 conversions between the three formats. UTF-7 is not included
nuclear@0 30 here, but is handled in a separate source file.
nuclear@0 31
nuclear@0 32 Each of these routines takes pointers to input buffers and output
nuclear@0 33 buffers. The input buffers are const.
nuclear@0 34
nuclear@0 35 Each routine converts the text between *sourceStart and sourceEnd,
nuclear@0 36 putting the result into the buffer between *targetStart and
nuclear@0 37 targetEnd. Note: the end pointers are *after* the last item: e.g.
nuclear@0 38 *(sourceEnd - 1) is the last item.
nuclear@0 39
nuclear@0 40 The return result indicates whether the conversion was successful,
nuclear@0 41 and if not, whether the problem was in the source or target buffers.
nuclear@0 42 (Only the first encountered problem is indicated.)
nuclear@0 43
nuclear@0 44 After the conversion, *sourceStart and *targetStart are both
nuclear@0 45 updated to point to the end of last text successfully converted in
nuclear@0 46 the respective buffers.
nuclear@0 47
nuclear@0 48 Input parameters:
nuclear@0 49 sourceStart - pointer to a pointer to the source buffer.
nuclear@0 50 The contents of this are modified on return so that
nuclear@0 51 it points at the next thing to be converted.
nuclear@0 52 targetStart - similarly, pointer to pointer to the target buffer.
nuclear@0 53 sourceEnd, targetEnd - respectively pointers to the ends of the
nuclear@0 54 two buffers, for overflow checking only.
nuclear@0 55
nuclear@0 56 These conversion functions take a ConversionFlags argument. When this
nuclear@0 57 flag is set to strict, both irregular sequences and isolated surrogates
nuclear@0 58 will cause an error. When the flag is set to lenient, both irregular
nuclear@0 59 sequences and isolated surrogates are converted.
nuclear@0 60
nuclear@0 61 Whether the flag is strict or lenient, all illegal sequences will cause
nuclear@0 62 an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
nuclear@0 63 or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
nuclear@0 64 must check for illegal sequences.
nuclear@0 65
nuclear@0 66 When the flag is set to lenient, characters over 0x10FFFF are converted
nuclear@0 67 to the replacement character; otherwise (when the flag is set to strict)
nuclear@0 68 they constitute an error.
nuclear@0 69
nuclear@0 70 Output parameters:
nuclear@0 71 The value "sourceIllegal" is returned from some routines if the input
nuclear@0 72 sequence is malformed. When "sourceIllegal" is returned, the source
nuclear@0 73 value will point to the illegal value that caused the problem. E.g.,
nuclear@0 74 in UTF-8 when a sequence is malformed, it points to the start of the
nuclear@0 75 malformed sequence.
nuclear@0 76
nuclear@0 77 Author: Mark E. Davis, 1994.
nuclear@0 78 Rev History: Rick McGowan, fixes & updates May 2001.
nuclear@0 79 Fixes & updates, Sept 2001.
nuclear@0 80
nuclear@0 81 ------------------------------------------------------------------------ */
nuclear@0 82
nuclear@0 83 /* ---------------------------------------------------------------------
nuclear@0 84 The following 4 definitions are compiler-specific.
nuclear@0 85 The C standard does not guarantee that wchar_t has at least
nuclear@0 86 16 bits, so wchar_t is no less portable than unsigned short!
nuclear@0 87 All should be unsigned values to avoid sign extension during
nuclear@0 88 bit mask & shift operations.
nuclear@0 89 ------------------------------------------------------------------------ */
nuclear@0 90
nuclear@0 91 typedef unsigned long UTF32; /* at least 32 bits */
nuclear@0 92 typedef unsigned short UTF16; /* at least 16 bits */
nuclear@0 93 typedef unsigned char UTF8; /* typically 8 bits */
nuclear@0 94 typedef unsigned char Boolean; /* 0 or 1 */
nuclear@0 95
nuclear@0 96 /* Some fundamental constants */
nuclear@0 97 #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
nuclear@0 98 #define UNI_MAX_BMP (UTF32)0x0000FFFF
nuclear@0 99 #define UNI_MAX_UTF16 (UTF32)0x0010FFFF
nuclear@0 100 #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
nuclear@0 101 #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
nuclear@0 102
nuclear@0 103 typedef enum {
nuclear@0 104 conversionOK, /* conversion successful */
nuclear@0 105 sourceExhausted, /* partial character in source, but hit end */
nuclear@0 106 targetExhausted, /* insuff. room in target for conversion */
nuclear@0 107 sourceIllegal /* source sequence is illegal/malformed */
nuclear@0 108 } ConversionResult;
nuclear@0 109
nuclear@0 110 typedef enum {
nuclear@0 111 strictConversion = 0,
nuclear@0 112 lenientConversion
nuclear@0 113 } ConversionFlags;
nuclear@0 114
nuclear@0 115 /* This is for C++ and does no harm in C */
nuclear@0 116 #ifdef __cplusplus
nuclear@0 117 extern "C" {
nuclear@0 118 #endif
nuclear@0 119
nuclear@0 120 ConversionResult ConvertUTF8toUTF16 (
nuclear@0 121 const UTF8** sourceStart, const UTF8* sourceEnd,
nuclear@0 122 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
nuclear@0 123
nuclear@0 124 ConversionResult ConvertUTF16toUTF8 (
nuclear@0 125 const UTF16** sourceStart, const UTF16* sourceEnd,
nuclear@0 126 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
nuclear@0 127
nuclear@0 128 ConversionResult ConvertUTF8toUTF32 (
nuclear@0 129 const UTF8** sourceStart, const UTF8* sourceEnd,
nuclear@0 130 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
nuclear@0 131
nuclear@0 132 ConversionResult ConvertUTF32toUTF8 (
nuclear@0 133 const UTF32** sourceStart, const UTF32* sourceEnd,
nuclear@0 134 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
nuclear@0 135
nuclear@0 136 ConversionResult ConvertUTF16toUTF32 (
nuclear@0 137 const UTF16** sourceStart, const UTF16* sourceEnd,
nuclear@0 138 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
nuclear@0 139
nuclear@0 140 ConversionResult ConvertUTF32toUTF16 (
nuclear@0 141 const UTF32** sourceStart, const UTF32* sourceEnd,
nuclear@0 142 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
nuclear@0 143
nuclear@0 144 Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
nuclear@0 145
nuclear@0 146 #ifdef __cplusplus
nuclear@0 147 }
nuclear@0 148 #endif
nuclear@0 149
nuclear@0 150 /* --------------------------------------------------------------------- */
nuclear@0 151 #endif // CONVERTUTF_H