vrshoot

annotate libs/assimp/irrXML/CXMLReaderImpl.h @ 0:b2f14e535253

initial commit
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 01 Feb 2014 19:58:19 +0200
parents
children
rev   line source
nuclear@0 1 // Copyright (C) 2002-2005 Nikolaus Gebhardt
nuclear@0 2 // This file is part of the "Irrlicht Engine" and the "irrXML" project.
nuclear@0 3 // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h
nuclear@0 4
nuclear@0 5 #ifndef __ICXML_READER_IMPL_H_INCLUDED__
nuclear@0 6 #define __ICXML_READER_IMPL_H_INCLUDED__
nuclear@0 7
nuclear@0 8 #include "irrXML.h"
nuclear@0 9 #include "irrString.h"
nuclear@0 10 #include "irrArray.h"
nuclear@0 11
nuclear@0 12 using namespace Assimp;
nuclear@0 13
nuclear@0 14 #ifdef _DEBUG
nuclear@0 15 #define IRR_DEBUGPRINT(x) printf((x));
nuclear@0 16 #else // _DEBUG
nuclear@0 17 #define IRR_DEBUGPRINT(x)
nuclear@0 18 #endif // _DEBUG
nuclear@0 19
nuclear@0 20
nuclear@0 21 namespace irr
nuclear@0 22 {
nuclear@0 23 namespace io
nuclear@0 24 {
nuclear@0 25
nuclear@0 26
nuclear@0 27 //! implementation of the IrrXMLReader
nuclear@0 28 template<class char_type, class superclass>
nuclear@0 29 class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>
nuclear@0 30 {
nuclear@0 31 public:
nuclear@0 32
nuclear@0 33 //! Constructor
nuclear@0 34 CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)
nuclear@0 35 : TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE),
nuclear@0 36 SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII)
nuclear@0 37 {
nuclear@0 38 if (!callback)
nuclear@0 39 return;
nuclear@0 40
nuclear@0 41 storeTargetFormat();
nuclear@0 42
nuclear@0 43 // read whole xml file
nuclear@0 44
nuclear@0 45 readFile(callback);
nuclear@0 46
nuclear@0 47 // clean up
nuclear@0 48
nuclear@0 49 if (deleteCallBack)
nuclear@0 50 delete callback;
nuclear@0 51
nuclear@0 52 // create list with special characters
nuclear@0 53
nuclear@0 54 createSpecialCharacterList();
nuclear@0 55
nuclear@0 56 // set pointer to text begin
nuclear@0 57 P = TextBegin;
nuclear@0 58 }
nuclear@0 59
nuclear@0 60
nuclear@0 61 //! Destructor
nuclear@0 62 virtual ~CXMLReaderImpl()
nuclear@0 63 {
nuclear@0 64 delete [] TextData;
nuclear@0 65 }
nuclear@0 66
nuclear@0 67
nuclear@0 68 //! Reads forward to the next xml node.
nuclear@0 69 //! \return Returns false, if there was no further node.
nuclear@0 70 virtual bool read()
nuclear@0 71 {
nuclear@0 72 // if not end reached, parse the node
nuclear@0 73 if (P && (unsigned int)(P - TextBegin) < TextSize - 1 && *P != 0)
nuclear@0 74 {
nuclear@0 75 parseCurrentNode();
nuclear@0 76 return true;
nuclear@0 77 }
nuclear@0 78
nuclear@0 79 _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;
nuclear@0 80 return false;
nuclear@0 81 }
nuclear@0 82
nuclear@0 83
nuclear@0 84 //! Returns the type of the current XML node.
nuclear@0 85 virtual EXML_NODE getNodeType() const
nuclear@0 86 {
nuclear@0 87 return CurrentNodeType;
nuclear@0 88 }
nuclear@0 89
nuclear@0 90
nuclear@0 91 //! Returns attribute count of the current XML node.
nuclear@0 92 virtual int getAttributeCount() const
nuclear@0 93 {
nuclear@0 94 return Attributes.size();
nuclear@0 95 }
nuclear@0 96
nuclear@0 97
nuclear@0 98 //! Returns name of an attribute.
nuclear@0 99 virtual const char_type* getAttributeName(int idx) const
nuclear@0 100 {
nuclear@0 101 if (idx < 0 || idx >= (int)Attributes.size())
nuclear@0 102 return 0;
nuclear@0 103
nuclear@0 104 return Attributes[idx].Name.c_str();
nuclear@0 105 }
nuclear@0 106
nuclear@0 107
nuclear@0 108 //! Returns the value of an attribute.
nuclear@0 109 virtual const char_type* getAttributeValue(int idx) const
nuclear@0 110 {
nuclear@0 111 if (idx < 0 || idx >= (int)Attributes.size())
nuclear@0 112 return 0;
nuclear@0 113
nuclear@0 114 return Attributes[idx].Value.c_str();
nuclear@0 115 }
nuclear@0 116
nuclear@0 117
nuclear@0 118 //! Returns the value of an attribute.
nuclear@0 119 virtual const char_type* getAttributeValue(const char_type* name) const
nuclear@0 120 {
nuclear@0 121 const SAttribute* attr = getAttributeByName(name);
nuclear@0 122 if (!attr)
nuclear@0 123 return 0;
nuclear@0 124
nuclear@0 125 return attr->Value.c_str();
nuclear@0 126 }
nuclear@0 127
nuclear@0 128
nuclear@0 129 //! Returns the value of an attribute
nuclear@0 130 virtual const char_type* getAttributeValueSafe(const char_type* name) const
nuclear@0 131 {
nuclear@0 132 const SAttribute* attr = getAttributeByName(name);
nuclear@0 133 if (!attr)
nuclear@0 134 return EmptyString.c_str();
nuclear@0 135
nuclear@0 136 return attr->Value.c_str();
nuclear@0 137 }
nuclear@0 138
nuclear@0 139
nuclear@0 140
nuclear@0 141 //! Returns the value of an attribute as integer.
nuclear@0 142 int getAttributeValueAsInt(const char_type* name) const
nuclear@0 143 {
nuclear@0 144 return (int)getAttributeValueAsFloat(name);
nuclear@0 145 }
nuclear@0 146
nuclear@0 147
nuclear@0 148 //! Returns the value of an attribute as integer.
nuclear@0 149 int getAttributeValueAsInt(int idx) const
nuclear@0 150 {
nuclear@0 151 return (int)getAttributeValueAsFloat(idx);
nuclear@0 152 }
nuclear@0 153
nuclear@0 154
nuclear@0 155 //! Returns the value of an attribute as float.
nuclear@0 156 float getAttributeValueAsFloat(const char_type* name) const
nuclear@0 157 {
nuclear@0 158 const SAttribute* attr = getAttributeByName(name);
nuclear@0 159 if (!attr)
nuclear@0 160 return 0;
nuclear@0 161
nuclear@0 162 core::stringc c = attr->Value.c_str();
nuclear@0 163 return fast_atof(c.c_str());
nuclear@0 164 }
nuclear@0 165
nuclear@0 166
nuclear@0 167 //! Returns the value of an attribute as float.
nuclear@0 168 float getAttributeValueAsFloat(int idx) const
nuclear@0 169 {
nuclear@0 170 const char_type* attrvalue = getAttributeValue(idx);
nuclear@0 171 if (!attrvalue)
nuclear@0 172 return 0;
nuclear@0 173
nuclear@0 174 core::stringc c = attrvalue;
nuclear@0 175 return fast_atof(c.c_str());
nuclear@0 176 }
nuclear@0 177
nuclear@0 178
nuclear@0 179 //! Returns the name of the current node.
nuclear@0 180 virtual const char_type* getNodeName() const
nuclear@0 181 {
nuclear@0 182 return NodeName.c_str();
nuclear@0 183 }
nuclear@0 184
nuclear@0 185
nuclear@0 186 //! Returns data of the current node.
nuclear@0 187 virtual const char_type* getNodeData() const
nuclear@0 188 {
nuclear@0 189 return NodeName.c_str();
nuclear@0 190 }
nuclear@0 191
nuclear@0 192
nuclear@0 193 //! Returns if an element is an empty element, like <foo />
nuclear@0 194 virtual bool isEmptyElement() const
nuclear@0 195 {
nuclear@0 196 return IsEmptyElement;
nuclear@0 197 }
nuclear@0 198
nuclear@0 199 //! Returns format of the source xml file.
nuclear@0 200 virtual ETEXT_FORMAT getSourceFormat() const
nuclear@0 201 {
nuclear@0 202 return SourceFormat;
nuclear@0 203 }
nuclear@0 204
nuclear@0 205 //! Returns format of the strings returned by the parser.
nuclear@0 206 virtual ETEXT_FORMAT getParserFormat() const
nuclear@0 207 {
nuclear@0 208 return TargetFormat;
nuclear@0 209 }
nuclear@0 210
nuclear@0 211 private:
nuclear@0 212
nuclear@0 213 // Reads the current xml node
nuclear@0 214 void parseCurrentNode()
nuclear@0 215 {
nuclear@0 216 char_type* start = P;
nuclear@0 217
nuclear@0 218 // more forward until '<' found
nuclear@0 219 while(*P != L'<' && *P)
nuclear@0 220 ++P;
nuclear@0 221
nuclear@0 222 if (!*P)
nuclear@0 223 return;
nuclear@0 224
nuclear@0 225 if (P - start > 0)
nuclear@0 226 {
nuclear@0 227 // we found some text, store it
nuclear@0 228 if (setText(start, P))
nuclear@0 229 return;
nuclear@0 230 }
nuclear@0 231
nuclear@0 232 ++P;
nuclear@0 233
nuclear@0 234 // based on current token, parse and report next element
nuclear@0 235 switch(*P)
nuclear@0 236 {
nuclear@0 237 case L'/':
nuclear@0 238 parseClosingXMLElement();
nuclear@0 239 break;
nuclear@0 240 case L'?':
nuclear@0 241 ignoreDefinition();
nuclear@0 242 break;
nuclear@0 243 case L'!':
nuclear@0 244 if (!parseCDATA())
nuclear@0 245 parseComment();
nuclear@0 246 break;
nuclear@0 247 default:
nuclear@0 248 parseOpeningXMLElement();
nuclear@0 249 break;
nuclear@0 250 }
nuclear@0 251 }
nuclear@0 252
nuclear@0 253
nuclear@0 254 //! sets the state that text was found. Returns true if set should be set
nuclear@0 255 bool setText(char_type* start, char_type* end)
nuclear@0 256 {
nuclear@0 257 // check if text is more than 2 characters, and if not, check if there is
nuclear@0 258 // only white space, so that this text won't be reported
nuclear@0 259 if (end - start < 3)
nuclear@0 260 {
nuclear@0 261 char_type* p = start;
nuclear@0 262 for(; p != end; ++p)
nuclear@0 263 if (!isWhiteSpace(*p))
nuclear@0 264 break;
nuclear@0 265
nuclear@0 266 if (p == end)
nuclear@0 267 return false;
nuclear@0 268 }
nuclear@0 269
nuclear@0 270 // set current text to the parsed text, and replace xml special characters
nuclear@0 271 core::string<char_type> s(start, (int)(end - start));
nuclear@0 272 NodeName = replaceSpecialCharacters(s);
nuclear@0 273
nuclear@0 274 // current XML node type is text
nuclear@0 275 CurrentNodeType = EXN_TEXT;
nuclear@0 276
nuclear@0 277 return true;
nuclear@0 278 }
nuclear@0 279
nuclear@0 280
nuclear@0 281
nuclear@0 282 //! ignores an xml definition like <?xml something />
nuclear@0 283 void ignoreDefinition()
nuclear@0 284 {
nuclear@0 285 CurrentNodeType = EXN_UNKNOWN;
nuclear@0 286
nuclear@0 287 // move until end marked with '>' reached
nuclear@0 288 while(*P != L'>')
nuclear@0 289 ++P;
nuclear@0 290
nuclear@0 291 ++P;
nuclear@0 292 }
nuclear@0 293
nuclear@0 294
nuclear@0 295 //! parses a comment
nuclear@0 296 void parseComment()
nuclear@0 297 {
nuclear@0 298 CurrentNodeType = EXN_COMMENT;
nuclear@0 299 P += 1;
nuclear@0 300
nuclear@0 301 char_type *pCommentBegin = P;
nuclear@0 302
nuclear@0 303 int count = 1;
nuclear@0 304
nuclear@0 305 // move until end of comment reached
nuclear@0 306 while(count)
nuclear@0 307 {
nuclear@0 308 if (*P == L'>')
nuclear@0 309 --count;
nuclear@0 310 else
nuclear@0 311 if (*P == L'<')
nuclear@0 312 ++count;
nuclear@0 313
nuclear@0 314 ++P;
nuclear@0 315 }
nuclear@0 316
nuclear@0 317 P -= 3;
nuclear@0 318 NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));
nuclear@0 319 P += 3;
nuclear@0 320 }
nuclear@0 321
nuclear@0 322
nuclear@0 323 //! parses an opening xml element and reads attributes
nuclear@0 324 void parseOpeningXMLElement()
nuclear@0 325 {
nuclear@0 326 CurrentNodeType = EXN_ELEMENT;
nuclear@0 327 IsEmptyElement = false;
nuclear@0 328 Attributes.clear();
nuclear@0 329
nuclear@0 330 // find name
nuclear@0 331 const char_type* startName = P;
nuclear@0 332
nuclear@0 333 // find end of element
nuclear@0 334 while(*P != L'>' && !isWhiteSpace(*P))
nuclear@0 335 ++P;
nuclear@0 336
nuclear@0 337 const char_type* endName = P;
nuclear@0 338
nuclear@0 339 // find Attributes
nuclear@0 340 while(*P != L'>')
nuclear@0 341 {
nuclear@0 342 if (isWhiteSpace(*P))
nuclear@0 343 ++P;
nuclear@0 344 else
nuclear@0 345 {
nuclear@0 346 if (*P != L'/')
nuclear@0 347 {
nuclear@0 348 // we've got an attribute
nuclear@0 349
nuclear@0 350 // read the attribute names
nuclear@0 351 const char_type* attributeNameBegin = P;
nuclear@0 352
nuclear@0 353 while(!isWhiteSpace(*P) && *P != L'=')
nuclear@0 354 ++P;
nuclear@0 355
nuclear@0 356 const char_type* attributeNameEnd = P;
nuclear@0 357 ++P;
nuclear@0 358
nuclear@0 359 // read the attribute value
nuclear@0 360 // check for quotes and single quotes, thx to murphy
nuclear@0 361 while( (*P != L'\"') && (*P != L'\'') && *P)
nuclear@0 362 ++P;
nuclear@0 363
nuclear@0 364 if (!*P) // malformatted xml file
nuclear@0 365 return;
nuclear@0 366
nuclear@0 367 const char_type attributeQuoteChar = *P;
nuclear@0 368
nuclear@0 369 ++P;
nuclear@0 370 const char_type* attributeValueBegin = P;
nuclear@0 371
nuclear@0 372 while(*P != attributeQuoteChar && *P)
nuclear@0 373 ++P;
nuclear@0 374
nuclear@0 375 if (!*P) // malformatted xml file
nuclear@0 376 return;
nuclear@0 377
nuclear@0 378 const char_type* attributeValueEnd = P;
nuclear@0 379 ++P;
nuclear@0 380
nuclear@0 381 SAttribute attr;
nuclear@0 382 attr.Name = core::string<char_type>(attributeNameBegin,
nuclear@0 383 (int)(attributeNameEnd - attributeNameBegin));
nuclear@0 384
nuclear@0 385 core::string<char_type> s(attributeValueBegin,
nuclear@0 386 (int)(attributeValueEnd - attributeValueBegin));
nuclear@0 387
nuclear@0 388 attr.Value = replaceSpecialCharacters(s);
nuclear@0 389 Attributes.push_back(attr);
nuclear@0 390 }
nuclear@0 391 else
nuclear@0 392 {
nuclear@0 393 // tag is closed directly
nuclear@0 394 ++P;
nuclear@0 395 IsEmptyElement = true;
nuclear@0 396 break;
nuclear@0 397 }
nuclear@0 398 }
nuclear@0 399 }
nuclear@0 400
nuclear@0 401 // check if this tag is closing directly
nuclear@0 402 if (endName > startName && *(endName-1) == L'/')
nuclear@0 403 {
nuclear@0 404 // directly closing tag
nuclear@0 405 IsEmptyElement = true;
nuclear@0 406 endName--;
nuclear@0 407 }
nuclear@0 408
nuclear@0 409 NodeName = core::string<char_type>(startName, (int)(endName - startName));
nuclear@0 410
nuclear@0 411 ++P;
nuclear@0 412 }
nuclear@0 413
nuclear@0 414
nuclear@0 415 //! parses an closing xml tag
nuclear@0 416 void parseClosingXMLElement()
nuclear@0 417 {
nuclear@0 418 CurrentNodeType = EXN_ELEMENT_END;
nuclear@0 419 IsEmptyElement = false;
nuclear@0 420 Attributes.clear();
nuclear@0 421
nuclear@0 422 ++P;
nuclear@0 423 const char_type* pBeginClose = P;
nuclear@0 424
nuclear@0 425 while(*P != L'>')
nuclear@0 426 ++P;
nuclear@0 427
nuclear@0 428 // remove trailing whitespace, if any
nuclear@0 429 while( isspace( P[-1]))
nuclear@0 430 --P;
nuclear@0 431
nuclear@0 432 NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));
nuclear@0 433 ++P;
nuclear@0 434 }
nuclear@0 435
nuclear@0 436 //! parses a possible CDATA section, returns false if begin was not a CDATA section
nuclear@0 437 bool parseCDATA()
nuclear@0 438 {
nuclear@0 439 if (*(P+1) != L'[')
nuclear@0 440 return false;
nuclear@0 441
nuclear@0 442 CurrentNodeType = EXN_CDATA;
nuclear@0 443
nuclear@0 444 // skip '<![CDATA['
nuclear@0 445 int count=0;
nuclear@0 446 while( *P && count<8 )
nuclear@0 447 {
nuclear@0 448 ++P;
nuclear@0 449 ++count;
nuclear@0 450 }
nuclear@0 451
nuclear@0 452 if (!*P)
nuclear@0 453 return true;
nuclear@0 454
nuclear@0 455 char_type *cDataBegin = P;
nuclear@0 456 char_type *cDataEnd = 0;
nuclear@0 457
nuclear@0 458 // find end of CDATA
nuclear@0 459 while(*P && !cDataEnd)
nuclear@0 460 {
nuclear@0 461 if (*P == L'>' &&
nuclear@0 462 (*(P-1) == L']') &&
nuclear@0 463 (*(P-2) == L']'))
nuclear@0 464 {
nuclear@0 465 cDataEnd = P - 2;
nuclear@0 466 }
nuclear@0 467
nuclear@0 468 ++P;
nuclear@0 469 }
nuclear@0 470
nuclear@0 471 if ( cDataEnd )
nuclear@0 472 NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));
nuclear@0 473 else
nuclear@0 474 NodeName = "";
nuclear@0 475
nuclear@0 476 return true;
nuclear@0 477 }
nuclear@0 478
nuclear@0 479
nuclear@0 480 // structure for storing attribute-name pairs
nuclear@0 481 struct SAttribute
nuclear@0 482 {
nuclear@0 483 core::string<char_type> Name;
nuclear@0 484 core::string<char_type> Value;
nuclear@0 485 };
nuclear@0 486
nuclear@0 487 // finds a current attribute by name, returns 0 if not found
nuclear@0 488 const SAttribute* getAttributeByName(const char_type* name) const
nuclear@0 489 {
nuclear@0 490 if (!name)
nuclear@0 491 return 0;
nuclear@0 492
nuclear@0 493 core::string<char_type> n = name;
nuclear@0 494
nuclear@0 495 for (int i=0; i<(int)Attributes.size(); ++i)
nuclear@0 496 if (Attributes[i].Name == n)
nuclear@0 497 return &Attributes[i];
nuclear@0 498
nuclear@0 499 return 0;
nuclear@0 500 }
nuclear@0 501
nuclear@0 502 // replaces xml special characters in a string and creates a new one
nuclear@0 503 core::string<char_type> replaceSpecialCharacters(
nuclear@0 504 core::string<char_type>& origstr)
nuclear@0 505 {
nuclear@0 506 int pos = origstr.findFirst(L'&');
nuclear@0 507 int oldPos = 0;
nuclear@0 508
nuclear@0 509 if (pos == -1)
nuclear@0 510 return origstr;
nuclear@0 511
nuclear@0 512 core::string<char_type> newstr;
nuclear@0 513
nuclear@0 514 while(pos != -1 && pos < origstr.size()-2)
nuclear@0 515 {
nuclear@0 516 // check if it is one of the special characters
nuclear@0 517
nuclear@0 518 int specialChar = -1;
nuclear@0 519 for (int i=0; i<(int)SpecialCharacters.size(); ++i)
nuclear@0 520 {
nuclear@0 521 const char_type* p = &origstr.c_str()[pos]+1;
nuclear@0 522
nuclear@0 523 if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))
nuclear@0 524 {
nuclear@0 525 specialChar = i;
nuclear@0 526 break;
nuclear@0 527 }
nuclear@0 528 }
nuclear@0 529
nuclear@0 530 if (specialChar != -1)
nuclear@0 531 {
nuclear@0 532 newstr.append(origstr.subString(oldPos, pos - oldPos));
nuclear@0 533 newstr.append(SpecialCharacters[specialChar][0]);
nuclear@0 534 pos += SpecialCharacters[specialChar].size();
nuclear@0 535 }
nuclear@0 536 else
nuclear@0 537 {
nuclear@0 538 newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
nuclear@0 539 pos += 1;
nuclear@0 540 }
nuclear@0 541
nuclear@0 542 // find next &
nuclear@0 543 oldPos = pos;
nuclear@0 544 pos = origstr.findNext(L'&', pos);
nuclear@0 545 }
nuclear@0 546
nuclear@0 547 if (oldPos < origstr.size()-1)
nuclear@0 548 newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));
nuclear@0 549
nuclear@0 550 return newstr;
nuclear@0 551 }
nuclear@0 552
nuclear@0 553
nuclear@0 554
nuclear@0 555 //! reads the xml file and converts it into the wanted character format.
nuclear@0 556 bool readFile(IFileReadCallBack* callback)
nuclear@0 557 {
nuclear@0 558 int size = callback->getSize();
nuclear@0 559 size += 4; // We need two terminating 0's at the end.
nuclear@0 560 // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4.
nuclear@0 561
nuclear@0 562 char* data8 = new char[size];
nuclear@0 563
nuclear@0 564 if (!callback->read(data8, size-4))
nuclear@0 565 {
nuclear@0 566 delete [] data8;
nuclear@0 567 return false;
nuclear@0 568 }
nuclear@0 569
nuclear@0 570 // add zeros at end
nuclear@0 571
nuclear@0 572 data8[size-1] = 0;
nuclear@0 573 data8[size-2] = 0;
nuclear@0 574 data8[size-3] = 0;
nuclear@0 575 data8[size-4] = 0;
nuclear@0 576
nuclear@0 577 char16* data16 = reinterpret_cast<char16*>(data8);
nuclear@0 578 char32* data32 = reinterpret_cast<char32*>(data8);
nuclear@0 579
nuclear@0 580 // now we need to convert the data to the desired target format
nuclear@0 581 // based on the byte order mark.
nuclear@0 582
nuclear@0 583 const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF;
nuclear@0 584 const int UTF16_BE = 0xFFFE;
nuclear@0 585 const int UTF16_LE = 0xFEFF;
nuclear@0 586 const int UTF32_BE = 0xFFFE0000;
nuclear@0 587 const int UTF32_LE = 0x0000FEFF;
nuclear@0 588
nuclear@0 589 // check source for all utf versions and convert to target data format
nuclear@0 590
nuclear@0 591 if (size >= 4 && data32[0] == (char32)UTF32_BE)
nuclear@0 592 {
nuclear@0 593 // UTF-32, big endian
nuclear@0 594 SourceFormat = ETF_UTF32_BE;
nuclear@0 595 convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
nuclear@0 596 }
nuclear@0 597 else
nuclear@0 598 if (size >= 4 && data32[0] == (char32)UTF32_LE)
nuclear@0 599 {
nuclear@0 600 // UTF-32, little endian
nuclear@0 601 SourceFormat = ETF_UTF32_LE;
nuclear@0 602 convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
nuclear@0 603 }
nuclear@0 604 else
nuclear@0 605 if (size >= 2 && data16[0] == UTF16_BE)
nuclear@0 606 {
nuclear@0 607 // UTF-16, big endian
nuclear@0 608 SourceFormat = ETF_UTF16_BE;
nuclear@0 609 convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
nuclear@0 610 }
nuclear@0 611 else
nuclear@0 612 if (size >= 2 && data16[0] == UTF16_LE)
nuclear@0 613 {
nuclear@0 614 // UTF-16, little endian
nuclear@0 615 SourceFormat = ETF_UTF16_LE;
nuclear@0 616 convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
nuclear@0 617 }
nuclear@0 618 else
nuclear@0 619 if (size >= 3 && data8[0] == UTF8[0] && data8[1] == UTF8[1] && data8[2] == UTF8[2])
nuclear@0 620 {
nuclear@0 621 // UTF-8
nuclear@0 622 SourceFormat = ETF_UTF8;
nuclear@0 623 convertTextData(data8+3, data8, size); // data8+3 because we need to skip the header
nuclear@0 624 }
nuclear@0 625 else
nuclear@0 626 {
nuclear@0 627 // ASCII
nuclear@0 628 SourceFormat = ETF_ASCII;
nuclear@0 629 convertTextData(data8, data8, size);
nuclear@0 630 }
nuclear@0 631
nuclear@0 632 return true;
nuclear@0 633 }
nuclear@0 634
nuclear@0 635
nuclear@0 636 //! converts the text file into the desired format.
nuclear@0 637 //! \param source: begin of the text (without byte order mark)
nuclear@0 638 //! \param pointerToStore: pointer to text data block which can be
nuclear@0 639 //! stored or deleted based on the nesessary conversion.
nuclear@0 640 //! \param sizeWithoutHeader: Text size in characters without header
nuclear@0 641 template<class src_char_type>
nuclear@0 642 void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)
nuclear@0 643 {
nuclear@0 644 // convert little to big endian if necessary
nuclear@0 645 if (sizeof(src_char_type) > 1 &&
nuclear@0 646 isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))
nuclear@0 647 convertToLittleEndian(source);
nuclear@0 648
nuclear@0 649 // check if conversion is necessary:
nuclear@0 650 if (sizeof(src_char_type) == sizeof(char_type))
nuclear@0 651 {
nuclear@0 652 // no need to convert
nuclear@0 653 TextBegin = (char_type*)source;
nuclear@0 654 TextData = (char_type*)pointerToStore;
nuclear@0 655 TextSize = sizeWithoutHeader;
nuclear@0 656 }
nuclear@0 657 else
nuclear@0 658 {
nuclear@0 659 // convert source into target data format.
nuclear@0 660 // TODO: implement a real conversion. This one just
nuclear@0 661 // copies bytes. This is a problem when there are
nuclear@0 662 // unicode symbols using more than one character.
nuclear@0 663
nuclear@0 664 TextData = new char_type[sizeWithoutHeader];
nuclear@0 665
nuclear@0 666 // MSVC debugger complains here about loss of data ...
nuclear@0 667
nuclear@0 668
nuclear@0 669 // FIXME - gcc complains about 'shift width larger than width of type'
nuclear@0 670 // for T == unsigned long. Avoid it by messing around volatile ..
nuclear@0 671 volatile unsigned int c = 3;
nuclear@0 672 const src_char_type cc = (src_char_type)((((uint64_t)1u << (sizeof( char_type)<<c)) - 1));
nuclear@0 673 for (int i=0; i<sizeWithoutHeader; ++i)
nuclear@0 674 TextData[i] = char_type( source[i] & cc);
nuclear@0 675
nuclear@0 676 TextBegin = TextData;
nuclear@0 677 TextSize = sizeWithoutHeader;
nuclear@0 678
nuclear@0 679 // delete original data because no longer needed
nuclear@0 680 delete [] pointerToStore;
nuclear@0 681 }
nuclear@0 682 }
nuclear@0 683
nuclear@0 684 //! converts whole text buffer to little endian
nuclear@0 685 template<class src_char_type>
nuclear@0 686 void convertToLittleEndian(src_char_type* t)
nuclear@0 687 {
nuclear@0 688 if (sizeof(src_char_type) == 4)
nuclear@0 689 {
nuclear@0 690 // 32 bit
nuclear@0 691
nuclear@0 692 while(*t)
nuclear@0 693 {
nuclear@0 694 *t = ((*t & 0xff000000) >> 24) |
nuclear@0 695 ((*t & 0x00ff0000) >> 8) |
nuclear@0 696 ((*t & 0x0000ff00) << 8) |
nuclear@0 697 ((*t & 0x000000ff) << 24);
nuclear@0 698 ++t;
nuclear@0 699 }
nuclear@0 700 }
nuclear@0 701 else
nuclear@0 702 {
nuclear@0 703 // 16 bit
nuclear@0 704
nuclear@0 705 while(*t)
nuclear@0 706 {
nuclear@0 707 *t = (*t >> 8) | (*t << 8);
nuclear@0 708 ++t;
nuclear@0 709 }
nuclear@0 710 }
nuclear@0 711 }
nuclear@0 712
nuclear@0 713 //! returns if a format is little endian
nuclear@0 714 inline bool isLittleEndian(ETEXT_FORMAT f)
nuclear@0 715 {
nuclear@0 716 return f == ETF_ASCII ||
nuclear@0 717 f == ETF_UTF8 ||
nuclear@0 718 f == ETF_UTF16_LE ||
nuclear@0 719 f == ETF_UTF32_LE;
nuclear@0 720 }
nuclear@0 721
nuclear@0 722
nuclear@0 723 //! returns true if a character is whitespace
nuclear@0 724 inline bool isWhiteSpace(char_type c)
nuclear@0 725 {
nuclear@0 726 return (c==' ' || c=='\t' || c=='\n' || c=='\r');
nuclear@0 727 }
nuclear@0 728
nuclear@0 729
nuclear@0 730 //! generates a list with xml special characters
nuclear@0 731 void createSpecialCharacterList()
nuclear@0 732 {
nuclear@0 733 // list of strings containing special symbols,
nuclear@0 734 // the first character is the special character,
nuclear@0 735 // the following is the symbol string without trailing &.
nuclear@0 736
nuclear@0 737 SpecialCharacters.push_back("&amp;");
nuclear@0 738 SpecialCharacters.push_back("<lt;");
nuclear@0 739 SpecialCharacters.push_back(">gt;");
nuclear@0 740 SpecialCharacters.push_back("\"quot;");
nuclear@0 741 SpecialCharacters.push_back("'apos;");
nuclear@0 742
nuclear@0 743 }
nuclear@0 744
nuclear@0 745
nuclear@0 746 //! compares the first n characters of the strings
nuclear@0 747 bool equalsn(const char_type* str1, const char_type* str2, int len)
nuclear@0 748 {
nuclear@0 749 int i;
nuclear@0 750 for(i=0; str1[i] && str2[i] && i < len; ++i)
nuclear@0 751 if (str1[i] != str2[i])
nuclear@0 752 return false;
nuclear@0 753
nuclear@0 754 // if one (or both) of the strings was smaller then they
nuclear@0 755 // are only equal if they have the same lenght
nuclear@0 756 return (i == len) || (str1[i] == 0 && str2[i] == 0);
nuclear@0 757 }
nuclear@0 758
nuclear@0 759
nuclear@0 760 //! stores the target text format
nuclear@0 761 void storeTargetFormat()
nuclear@0 762 {
nuclear@0 763 // get target format. We could have done this using template specialization,
nuclear@0 764 // but VisualStudio 6 don't like it and we want to support it.
nuclear@0 765
nuclear@0 766 switch(sizeof(char_type))
nuclear@0 767 {
nuclear@0 768 case 1:
nuclear@0 769 TargetFormat = ETF_UTF8;
nuclear@0 770 break;
nuclear@0 771 case 2:
nuclear@0 772 TargetFormat = ETF_UTF16_LE;
nuclear@0 773 break;
nuclear@0 774 case 4:
nuclear@0 775 TargetFormat = ETF_UTF32_LE;
nuclear@0 776 break;
nuclear@0 777 default:
nuclear@0 778 TargetFormat = ETF_ASCII; // should never happen.
nuclear@0 779 }
nuclear@0 780 }
nuclear@0 781
nuclear@0 782
nuclear@0 783 // instance variables:
nuclear@0 784
nuclear@0 785 char_type* TextData; // data block of the text file
nuclear@0 786 char_type* P; // current point in text to parse
nuclear@0 787 char_type* TextBegin; // start of text to parse
nuclear@0 788 unsigned int TextSize; // size of text to parse in characters, not bytes
nuclear@0 789
nuclear@0 790 EXML_NODE CurrentNodeType; // type of the currently parsed node
nuclear@0 791 ETEXT_FORMAT SourceFormat; // source format of the xml file
nuclear@0 792 ETEXT_FORMAT TargetFormat; // output format of this parser
nuclear@0 793
nuclear@0 794 core::string<char_type> NodeName; // name of the node currently in
nuclear@0 795 core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods
nuclear@0 796
nuclear@0 797 bool IsEmptyElement; // is the currently parsed node empty?
nuclear@0 798
nuclear@0 799 core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList()
nuclear@0 800
nuclear@0 801 core::array<SAttribute> Attributes; // attributes of current element
nuclear@0 802
nuclear@0 803 }; // end CXMLReaderImpl
nuclear@0 804
nuclear@0 805
nuclear@0 806 } // end namespace
nuclear@0 807 } // end namespace
nuclear@0 808
nuclear@0 809 #endif