oculus1

annotate libovr/Src/Kernel/OVR_UTF8Util.cpp @ 1:e2f9e4603129

added LibOVR and started a simple vr wrapper.
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 14 Sep 2013 16:14:59 +0300
parents
children b069a5c27388
rev   line source
nuclear@1 1 /**************************************************************************
nuclear@1 2
nuclear@1 3 Filename : OVR_UTF8Util.cpp
nuclear@1 4 Content : UTF8 Unicode character encoding/decoding support
nuclear@1 5 Created : September 19, 2012
nuclear@1 6 Notes :
nuclear@1 7 Notes : Much useful info at "UTF-8 and Unicode FAQ"
nuclear@1 8 http://www.cl.cam.ac.uk/~mgk25/unicode.html
nuclear@1 9
nuclear@1 10 Copyright : Copyright 2012 Oculus VR, Inc. All Rights reserved.
nuclear@1 11
nuclear@1 12 Use of this software is subject to the terms of the Oculus license
nuclear@1 13 agreement provided at the time of installation or download, or which
nuclear@1 14 otherwise accompanies this software in either electronic or hard copy form.
nuclear@1 15
nuclear@1 16 ************************************************************************************/
nuclear@1 17
nuclear@1 18 #include "OVR_UTF8Util.h"
nuclear@1 19
nuclear@1 20 namespace OVR { namespace UTF8Util {
nuclear@1 21
nuclear@1 22 SPInt OVR_STDCALL GetLength(const char* buf, SPInt buflen)
nuclear@1 23 {
nuclear@1 24 const char* p = buf;
nuclear@1 25 SPInt length = 0;
nuclear@1 26
nuclear@1 27 if (buflen != -1)
nuclear@1 28 {
nuclear@1 29 while (p - buf < buflen)
nuclear@1 30 {
nuclear@1 31 // We should be able to have ASStrings with 0 in the middle.
nuclear@1 32 UTF8Util::DecodeNextChar_Advance0(&p);
nuclear@1 33 length++;
nuclear@1 34 }
nuclear@1 35 }
nuclear@1 36 else
nuclear@1 37 {
nuclear@1 38 while (UTF8Util::DecodeNextChar_Advance0(&p))
nuclear@1 39 length++;
nuclear@1 40 }
nuclear@1 41
nuclear@1 42 return length;
nuclear@1 43 }
nuclear@1 44
nuclear@1 45 UInt32 OVR_STDCALL GetCharAt(SPInt index, const char* putf8str, SPInt length)
nuclear@1 46 {
nuclear@1 47 const char* buf = putf8str;
nuclear@1 48 UInt32 c = 0;
nuclear@1 49
nuclear@1 50 if (length != -1)
nuclear@1 51 {
nuclear@1 52 while (buf - putf8str < length)
nuclear@1 53 {
nuclear@1 54 c = UTF8Util::DecodeNextChar_Advance0(&buf);
nuclear@1 55 if (index == 0)
nuclear@1 56 return c;
nuclear@1 57 index--;
nuclear@1 58 }
nuclear@1 59
nuclear@1 60 return c;
nuclear@1 61 }
nuclear@1 62
nuclear@1 63 do
nuclear@1 64 {
nuclear@1 65 c = UTF8Util::DecodeNextChar_Advance0(&buf);
nuclear@1 66 index--;
nuclear@1 67
nuclear@1 68 if (c == 0)
nuclear@1 69 {
nuclear@1 70 // We've hit the end of the string; don't go further.
nuclear@1 71 OVR_ASSERT(index == 0);
nuclear@1 72 return c;
nuclear@1 73 }
nuclear@1 74 } while (index >= 0);
nuclear@1 75
nuclear@1 76 return c;
nuclear@1 77 }
nuclear@1 78
nuclear@1 79 SPInt OVR_STDCALL GetByteIndex(SPInt index, const char *putf8str, SPInt length)
nuclear@1 80 {
nuclear@1 81 const char* buf = putf8str;
nuclear@1 82
nuclear@1 83 if (length != -1)
nuclear@1 84 {
nuclear@1 85 while ((buf - putf8str) < length && index > 0)
nuclear@1 86 {
nuclear@1 87 UTF8Util::DecodeNextChar_Advance0(&buf);
nuclear@1 88 index--;
nuclear@1 89 }
nuclear@1 90
nuclear@1 91 return buf-putf8str;
nuclear@1 92 }
nuclear@1 93
nuclear@1 94 while (index > 0)
nuclear@1 95 {
nuclear@1 96 UInt32 c = UTF8Util::DecodeNextChar_Advance0(&buf);
nuclear@1 97 index--;
nuclear@1 98
nuclear@1 99 if (c == 0)
nuclear@1 100 return buf-putf8str;
nuclear@1 101 };
nuclear@1 102
nuclear@1 103 return buf-putf8str;
nuclear@1 104 }
nuclear@1 105
nuclear@1 106 int OVR_STDCALL GetEncodeCharSize(UInt32 ucs_character)
nuclear@1 107 {
nuclear@1 108 if (ucs_character <= 0x7F)
nuclear@1 109 return 1;
nuclear@1 110 else if (ucs_character <= 0x7FF)
nuclear@1 111 return 2;
nuclear@1 112 else if (ucs_character <= 0xFFFF)
nuclear@1 113 return 3;
nuclear@1 114 else if (ucs_character <= 0x1FFFFF)
nuclear@1 115 return 4;
nuclear@1 116 else if (ucs_character <= 0x3FFFFFF)
nuclear@1 117 return 5;
nuclear@1 118 else if (ucs_character <= 0x7FFFFFFF)
nuclear@1 119 return 6;
nuclear@1 120 else
nuclear@1 121 return 0;
nuclear@1 122 }
nuclear@1 123
nuclear@1 124 UInt32 OVR_STDCALL DecodeNextChar_Advance0(const char** putf8Buffer)
nuclear@1 125 {
nuclear@1 126 UInt32 uc;
nuclear@1 127 char c;
nuclear@1 128
nuclear@1 129 // Security considerations:
nuclear@1 130 //
nuclear@1 131 // Changed, this is now only the case for DecodeNextChar:
nuclear@1 132 // - If we hit a zero byte, we want to return 0 without stepping
nuclear@1 133 // the buffer pointer past the 0. th
nuclear@1 134 //
nuclear@1 135 // If we hit an "overlong sequence"; i.e. a character encoded
nuclear@1 136 // in a longer multibyte string than is necessary, then we
nuclear@1 137 // need to discard the character. This is so attackers can't
nuclear@1 138 // disguise dangerous characters or character sequences --
nuclear@1 139 // there is only one valid encoding for each character.
nuclear@1 140 //
nuclear@1 141 // If we decode characters { 0xD800 .. 0xDFFF } or { 0xFFFE,
nuclear@1 142 // 0xFFFF } then we ignore them; they are not valid in UTF-8.
nuclear@1 143
nuclear@1 144 // This isn't actually an invalid character; it's a valid char that
nuclear@1 145 // looks like an inverted question mark.
nuclear@1 146 #define INVALID_CHAR 0x0FFFD
nuclear@1 147
nuclear@1 148 #define FIRST_BYTE(mask, shift) \
nuclear@1 149 uc = (c & (mask)) << (shift);
nuclear@1 150
nuclear@1 151 #define NEXT_BYTE(shift) \
nuclear@1 152 c = **putf8Buffer; \
nuclear@1 153 if (c == 0) return 0; /* end of buffer, do not advance */ \
nuclear@1 154 if ((c & 0xC0) != 0x80) return INVALID_CHAR; /* standard check */ \
nuclear@1 155 (*putf8Buffer)++; \
nuclear@1 156 uc |= (c & 0x3F) << shift;
nuclear@1 157
nuclear@1 158 c = **putf8Buffer;
nuclear@1 159 (*putf8Buffer)++;
nuclear@1 160 if (c == 0)
nuclear@1 161 return 0; // End of buffer.
nuclear@1 162
nuclear@1 163 if ((c & 0x80) == 0) return (UInt32) c; // Conventional 7-bit ASCII.
nuclear@1 164
nuclear@1 165 // Multi-byte sequences.
nuclear@1 166 if ((c & 0xE0) == 0xC0)
nuclear@1 167 {
nuclear@1 168 // Two-byte sequence.
nuclear@1 169 FIRST_BYTE(0x1F, 6);
nuclear@1 170 NEXT_BYTE(0);
nuclear@1 171 if (uc < 0x80) return INVALID_CHAR; // overlong
nuclear@1 172 return uc;
nuclear@1 173 }
nuclear@1 174 else if ((c & 0xF0) == 0xE0)
nuclear@1 175 {
nuclear@1 176 // Three-byte sequence.
nuclear@1 177 FIRST_BYTE(0x0F, 12);
nuclear@1 178 NEXT_BYTE(6);
nuclear@1 179 NEXT_BYTE(0);
nuclear@1 180 if (uc < 0x800) return INVALID_CHAR; // overlong
nuclear@1 181 // Not valid ISO 10646, but Flash requires these to work
nuclear@1 182 // see AS3 test e15_5_3_2_3 for String.fromCharCode().charCodeAt(0)
nuclear@1 183 // if (uc >= 0x0D800 && uc <= 0x0DFFF) return INVALID_CHAR;
nuclear@1 184 // if (uc == 0x0FFFE || uc == 0x0FFFF) return INVALID_CHAR; // not valid ISO 10646
nuclear@1 185 return uc;
nuclear@1 186 }
nuclear@1 187 else if ((c & 0xF8) == 0xF0)
nuclear@1 188 {
nuclear@1 189 // Four-byte sequence.
nuclear@1 190 FIRST_BYTE(0x07, 18);
nuclear@1 191 NEXT_BYTE(12);
nuclear@1 192 NEXT_BYTE(6);
nuclear@1 193 NEXT_BYTE(0);
nuclear@1 194 if (uc < 0x010000) return INVALID_CHAR; // overlong
nuclear@1 195 return uc;
nuclear@1 196 }
nuclear@1 197 else if ((c & 0xFC) == 0xF8)
nuclear@1 198 {
nuclear@1 199 // Five-byte sequence.
nuclear@1 200 FIRST_BYTE(0x03, 24);
nuclear@1 201 NEXT_BYTE(18);
nuclear@1 202 NEXT_BYTE(12);
nuclear@1 203 NEXT_BYTE(6);
nuclear@1 204 NEXT_BYTE(0);
nuclear@1 205 if (uc < 0x0200000) return INVALID_CHAR; // overlong
nuclear@1 206 return uc;
nuclear@1 207 }
nuclear@1 208 else if ((c & 0xFE) == 0xFC)
nuclear@1 209 {
nuclear@1 210 // Six-byte sequence.
nuclear@1 211 FIRST_BYTE(0x01, 30);
nuclear@1 212 NEXT_BYTE(24);
nuclear@1 213 NEXT_BYTE(18);
nuclear@1 214 NEXT_BYTE(12);
nuclear@1 215 NEXT_BYTE(6);
nuclear@1 216 NEXT_BYTE(0);
nuclear@1 217 if (uc < 0x04000000) return INVALID_CHAR; // overlong
nuclear@1 218 return uc;
nuclear@1 219 }
nuclear@1 220 else
nuclear@1 221 {
nuclear@1 222 // Invalid.
nuclear@1 223 return INVALID_CHAR;
nuclear@1 224 }
nuclear@1 225 }
nuclear@1 226
nuclear@1 227
nuclear@1 228 void OVR_STDCALL EncodeChar(char* pbuffer, SPInt* pindex, UInt32 ucs_character)
nuclear@1 229 {
nuclear@1 230 if (ucs_character <= 0x7F)
nuclear@1 231 {
nuclear@1 232 // Plain single-byte ASCII.
nuclear@1 233 pbuffer[(*pindex)++] = (char) ucs_character;
nuclear@1 234 }
nuclear@1 235 else if (ucs_character <= 0x7FF)
nuclear@1 236 {
nuclear@1 237 // Two bytes.
nuclear@1 238 pbuffer[(*pindex)++] = 0xC0 | (char)(ucs_character >> 6);
nuclear@1 239 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
nuclear@1 240 }
nuclear@1 241 else if (ucs_character <= 0xFFFF)
nuclear@1 242 {
nuclear@1 243 // Three bytes.
nuclear@1 244 pbuffer[(*pindex)++] = 0xE0 | (char)(ucs_character >> 12);
nuclear@1 245 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 6) & 0x3F);
nuclear@1 246 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
nuclear@1 247 }
nuclear@1 248 else if (ucs_character <= 0x1FFFFF)
nuclear@1 249 {
nuclear@1 250 // Four bytes.
nuclear@1 251 pbuffer[(*pindex)++] = 0xF0 | (char)(ucs_character >> 18);
nuclear@1 252 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 12) & 0x3F);
nuclear@1 253 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 6) & 0x3F);
nuclear@1 254 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
nuclear@1 255 }
nuclear@1 256 else if (ucs_character <= 0x3FFFFFF)
nuclear@1 257 {
nuclear@1 258 // Five bytes.
nuclear@1 259 pbuffer[(*pindex)++] = 0xF8 | (char)(ucs_character >> 24);
nuclear@1 260 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 18) & 0x3F);
nuclear@1 261 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 12) & 0x3F);
nuclear@1 262 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 6) & 0x3F);
nuclear@1 263 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
nuclear@1 264 }
nuclear@1 265 else if (ucs_character <= 0x7FFFFFFF)
nuclear@1 266 {
nuclear@1 267 // Six bytes.
nuclear@1 268 pbuffer[(*pindex)++] = 0xFC | (char)(ucs_character >> 30);
nuclear@1 269 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 24) & 0x3F);
nuclear@1 270 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 18) & 0x3F);
nuclear@1 271 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 12) & 0x3F);
nuclear@1 272 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 6) & 0x3F);
nuclear@1 273 pbuffer[(*pindex)++] = 0x80 | (char)((ucs_character >> 0) & 0x3F);
nuclear@1 274 }
nuclear@1 275 else
nuclear@1 276 {
nuclear@1 277 // Invalid char; don't encode anything.
nuclear@1 278 }
nuclear@1 279 }
nuclear@1 280
nuclear@1 281 SPInt OVR_STDCALL GetEncodeStringSize(const wchar_t* pchar, SPInt length)
nuclear@1 282 {
nuclear@1 283 SPInt len = 0;
nuclear@1 284 if (length != -1)
nuclear@1 285 for (int i = 0; i < length; i++)
nuclear@1 286 {
nuclear@1 287 len += GetEncodeCharSize(pchar[i]);
nuclear@1 288 }
nuclear@1 289 else
nuclear@1 290 for (int i = 0;; i++)
nuclear@1 291 {
nuclear@1 292 if (pchar[i] == 0)
nuclear@1 293 return len;
nuclear@1 294 len += GetEncodeCharSize(pchar[i]);
nuclear@1 295 }
nuclear@1 296 return len;
nuclear@1 297 }
nuclear@1 298
nuclear@1 299 void OVR_STDCALL EncodeString(char *pbuff, const wchar_t* pchar, SPInt length)
nuclear@1 300 {
nuclear@1 301 SPInt ofs = 0;
nuclear@1 302 if (length != -1)
nuclear@1 303 {
nuclear@1 304 for (int i = 0; i < length; i++)
nuclear@1 305 {
nuclear@1 306 EncodeChar(pbuff, &ofs, pchar[i]);
nuclear@1 307 }
nuclear@1 308 }
nuclear@1 309 else
nuclear@1 310 {
nuclear@1 311 for (int i = 0;; i++)
nuclear@1 312 {
nuclear@1 313 if (pchar[i] == 0)
nuclear@1 314 break;
nuclear@1 315 EncodeChar(pbuff, &ofs, pchar[i]);
nuclear@1 316 }
nuclear@1 317 }
nuclear@1 318 pbuff[ofs] = 0;
nuclear@1 319 }
nuclear@1 320
nuclear@1 321 UPInt OVR_STDCALL DecodeString(wchar_t *pbuff, const char* putf8str, SPInt bytesLen)
nuclear@1 322 {
nuclear@1 323 wchar_t *pbegin = pbuff;
nuclear@1 324 if (bytesLen == -1)
nuclear@1 325 {
nuclear@1 326 while (1)
nuclear@1 327 {
nuclear@1 328 UInt32 ch = DecodeNextChar_Advance0(&putf8str);
nuclear@1 329 if (ch == 0)
nuclear@1 330 break;
nuclear@1 331 else if (ch >= 0xFFFF)
nuclear@1 332 ch = 0xFFFD;
nuclear@1 333 *pbuff++ = wchar_t(ch);
nuclear@1 334 }
nuclear@1 335 }
nuclear@1 336 else
nuclear@1 337 {
nuclear@1 338 const char* p = putf8str;
nuclear@1 339 while ((p - putf8str) < bytesLen)
nuclear@1 340 {
nuclear@1 341 UInt32 ch = DecodeNextChar_Advance0(&p);
nuclear@1 342 if (ch >= 0xFFFF)
nuclear@1 343 ch = 0xFFFD;
nuclear@1 344 *pbuff++ = wchar_t(ch);
nuclear@1 345 }
nuclear@1 346 }
nuclear@1 347
nuclear@1 348 *pbuff = 0;
nuclear@1 349 return pbuff - pbegin;
nuclear@1 350 }
nuclear@1 351
nuclear@1 352
nuclear@1 353 #ifdef UTF8_UNIT_TEST
nuclear@1 354
nuclear@1 355 // Compile this test case with something like:
nuclear@1 356 //
nuclear@1 357 // gcc utf8.cpp -g -I.. -DUTF8_UNIT_TEST -lstdc++ -o utf8_test
nuclear@1 358 //
nuclear@1 359 // or
nuclear@1 360 //
nuclear@1 361 // cl utf8.cpp -Zi -Od -DUTF8_UNIT_TEST -I..
nuclear@1 362 //
nuclear@1 363 // If possible, try running the test program with the first arg
nuclear@1 364 // pointing at the file:
nuclear@1 365 //
nuclear@1 366 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
nuclear@1 367 //
nuclear@1 368 // and examine the results by eye to make sure they are acceptable to
nuclear@1 369 // you.
nuclear@1 370
nuclear@1 371
nuclear@1 372 #include "base/utility.h"
nuclear@1 373 #include <stdio.h>
nuclear@1 374
nuclear@1 375
nuclear@1 376 bool check_equal(const char* utf8_in, const UInt32* ucs_in)
nuclear@1 377 {
nuclear@1 378 for (;;)
nuclear@1 379 {
nuclear@1 380 UInt32 next_ucs = *ucs_in++;
nuclear@1 381 UInt32 next_ucs_from_utf8 = utf8::decode_next_unicode_character(&utf8_in);
nuclear@1 382 if (next_ucs != next_ucs_from_utf8)
nuclear@1 383 {
nuclear@1 384 return false;
nuclear@1 385 }
nuclear@1 386 if (next_ucs == 0)
nuclear@1 387 {
nuclear@1 388 OVR_ASSERT(next_ucs_from_utf8 == 0);
nuclear@1 389 break;
nuclear@1 390 }
nuclear@1 391 }
nuclear@1 392
nuclear@1 393 return true;
nuclear@1 394 }
nuclear@1 395
nuclear@1 396
nuclear@1 397 void log_ascii(const char* line)
nuclear@1 398 {
nuclear@1 399 for (;;)
nuclear@1 400 {
nuclear@1 401 unsigned char c = (unsigned char) *line++;
nuclear@1 402 if (c == 0)
nuclear@1 403 {
nuclear@1 404 // End of line.
nuclear@1 405 return;
nuclear@1 406 }
nuclear@1 407 else if (c != '\n'
nuclear@1 408 && (c < 32 || c > 127))
nuclear@1 409 {
nuclear@1 410 // Non-printable as plain ASCII.
nuclear@1 411 printf("<0x%02X>", (int) c);
nuclear@1 412 }
nuclear@1 413 else
nuclear@1 414 {
nuclear@1 415 printf("%c", c);
nuclear@1 416 }
nuclear@1 417 }
nuclear@1 418 }
nuclear@1 419
nuclear@1 420
nuclear@1 421 void log_ucs(const UInt32* line)
nuclear@1 422 {
nuclear@1 423 for (;;)
nuclear@1 424 {
nuclear@1 425 UInt32 uc = *line++;
nuclear@1 426 if (uc == 0)
nuclear@1 427 {
nuclear@1 428 // End of line.
nuclear@1 429 return;
nuclear@1 430 }
nuclear@1 431 else if (uc != '\n'
nuclear@1 432 && (uc < 32 || uc > 127))
nuclear@1 433 {
nuclear@1 434 // Non-printable as plain ASCII.
nuclear@1 435 printf("<U-%04X>", uc);
nuclear@1 436 }
nuclear@1 437 else
nuclear@1 438 {
nuclear@1 439 printf("%c", (char) uc);
nuclear@1 440 }
nuclear@1 441 }
nuclear@1 442 }
nuclear@1 443
nuclear@1 444
nuclear@1 445 // Simple canned test.
nuclear@1 446 int main(int argc, const char* argv[])
nuclear@1 447 {
nuclear@1 448 {
nuclear@1 449 const char* test8 = "Ignacio CastaƱo";
nuclear@1 450 const UInt32 test32[] =
nuclear@1 451 {
nuclear@1 452 0x49, 0x67, 0x6E, 0x61, 0x63,
nuclear@1 453 0x69, 0x6F, 0x20, 0x43, 0x61,
nuclear@1 454 0x73, 0x74, 0x61, 0xF1, 0x6F,
nuclear@1 455 0x00
nuclear@1 456 };
nuclear@1 457
nuclear@1 458 OVR_ASSERT(check_equal(test8, test32));
nuclear@1 459 }
nuclear@1 460
nuclear@1 461 // If user passed an arg, try reading the file as UTF-8 encoded text.
nuclear@1 462 if (argc > 1)
nuclear@1 463 {
nuclear@1 464 const char* filename = argv[1];
nuclear@1 465 FILE* fp = fopen(filename, "rb");
nuclear@1 466 if (fp == NULL)
nuclear@1 467 {
nuclear@1 468 printf("Can't open file '%s'\n", filename);
nuclear@1 469 return 1;
nuclear@1 470 }
nuclear@1 471
nuclear@1 472 // Read lines from the file, encode/decode them, and highlight discrepancies.
nuclear@1 473 const int LINE_SIZE = 200; // max line size
nuclear@1 474 char line_buffer_utf8[LINE_SIZE];
nuclear@1 475 char reencoded_utf8[6 * LINE_SIZE];
nuclear@1 476 UInt32 line_buffer_ucs[LINE_SIZE];
nuclear@1 477
nuclear@1 478 int byte_counter = 0;
nuclear@1 479 for (;;)
nuclear@1 480 {
nuclear@1 481 int c = fgetc(fp);
nuclear@1 482 if (c == EOF)
nuclear@1 483 {
nuclear@1 484 // Done.
nuclear@1 485 break;
nuclear@1 486 }
nuclear@1 487 line_buffer_utf8[byte_counter++] = c;
nuclear@1 488 if (c == '\n' || byte_counter >= LINE_SIZE - 2)
nuclear@1 489 {
nuclear@1 490 // End of line. Process the line.
nuclear@1 491 line_buffer_utf8[byte_counter++] = 0; // terminate.
nuclear@1 492
nuclear@1 493 // Decode into UCS.
nuclear@1 494 const char* p = line_buffer_utf8;
nuclear@1 495 UInt32* q = line_buffer_ucs;
nuclear@1 496 for (;;)
nuclear@1 497 {
nuclear@1 498 UInt32 uc = UTF8Util::DecodeNextChar(&p);
nuclear@1 499 *q++ = uc;
nuclear@1 500
nuclear@1 501 OVR_ASSERT(q < line_buffer_ucs + LINE_SIZE);
nuclear@1 502 OVR_ASSERT(p < line_buffer_utf8 + LINE_SIZE);
nuclear@1 503
nuclear@1 504 if (uc == 0) break;
nuclear@1 505 }
nuclear@1 506
nuclear@1 507 // Encode back into UTF-8.
nuclear@1 508 q = line_buffer_ucs;
nuclear@1 509 int index = 0;
nuclear@1 510 for (;;)
nuclear@1 511 {
nuclear@1 512 UInt32 uc = *q++;
nuclear@1 513 OVR_ASSERT(index < LINE_SIZE * 6 - 6);
nuclear@1 514 int last_index = index;
nuclear@1 515 UTF8Util::EncodeChar(reencoded_utf8, &index, uc);
nuclear@1 516 OVR_ASSERT(index <= last_index + 6);
nuclear@1 517 if (uc == 0) break;
nuclear@1 518 }
nuclear@1 519
nuclear@1 520 // This can be useful for debugging.
nuclear@1 521 #if 0
nuclear@1 522 // Show the UCS and the re-encoded UTF-8.
nuclear@1 523 log_ucs(line_buffer_ucs);
nuclear@1 524 log_ascii(reencoded_utf8);
nuclear@1 525 #endif // 0
nuclear@1 526
nuclear@1 527 OVR_ASSERT(check_equal(line_buffer_utf8, line_buffer_ucs));
nuclear@1 528 OVR_ASSERT(check_equal(reencoded_utf8, line_buffer_ucs));
nuclear@1 529
nuclear@1 530 // Start next line.
nuclear@1 531 byte_counter = 0;
nuclear@1 532 }
nuclear@1 533 }
nuclear@1 534
nuclear@1 535 fclose(fp);
nuclear@1 536 }
nuclear@1 537
nuclear@1 538 return 0;
nuclear@1 539 }
nuclear@1 540
nuclear@1 541
nuclear@1 542 #endif // UTF8_UNIT_TEST
nuclear@1 543
nuclear@1 544 }} // namespace UTF8Util::OVR
nuclear@1 545