vrshoot: b2f14e535253 libs/assimp/irrXML/CXMLReaderImpl.h

vrshoot

view libs/assimp/irrXML/CXMLReaderImpl.h @ 0:b2f14e535253

initial commit

author	John Tsiombikas <nuclear@member.fsf.org>
date	Sat, 01 Feb 2014 19:58:19 +0200
parents
children

line source

2 // This file is part of the "Irrlicht Engine" and the "irrXML" project.

3 // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h

5 #ifndef __ICXML_READER_IMPL_H_INCLUDED__

6 #define __ICXML_READER_IMPL_H_INCLUDED__

8 #include "irrXML.h"

9 #include "irrString.h"

10 #include "irrArray.h"

12 using namespace Assimp;

14 #ifdef _DEBUG

15 #define IRR_DEBUGPRINT(x) printf((x));

16 #else // _DEBUG

17 #define IRR_DEBUGPRINT(x)

18 #endif // _DEBUG

21 namespace irr

22 {

23 namespace io

24 {

27 //! implementation of the IrrXMLReader

28 template<class char_type, class superclass>

29 class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>

30 {

31 public:

33 //! Constructor

34 CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)

35 : TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE),

36 SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII)

37 {

38 if (!callback)

39 return;

41 storeTargetFormat();

43 // read whole xml file

45 readFile(callback);

47 // clean up

49 if (deleteCallBack)

50 delete callback;

52 // create list with special characters

54 createSpecialCharacterList();

56 // set pointer to text begin

57 P = TextBegin;

58 }

61 //! Destructor

62 virtual ~CXMLReaderImpl()

63 {

64 delete [] TextData;

65 }

68 //! Reads forward to the next xml node.

69 //! \return Returns false, if there was no further node.

70 virtual bool read()

71 {

72 // if not end reached, parse the node

73 if (P && (unsigned int)(P - TextBegin) < TextSize - 1 && *P != 0)

74 {

75 parseCurrentNode();

76 return true;

77 }

79 _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;

80 return false;

81 }

84 //! Returns the type of the current XML node.

85 virtual EXML_NODE getNodeType() const

86 {

87 return CurrentNodeType;

88 }

91 //! Returns attribute count of the current XML node.

92 virtual int getAttributeCount() const

93 {

94 return Attributes.size();

95 }

98 //! Returns name of an attribute.

99 virtual const char_type* getAttributeName(int idx) const

100 {

101 if (idx < 0 || idx >= (int)Attributes.size())

102 return 0;

103

104 return Attributes[idx].Name.c_str();

105 }

106

107

108 //! Returns the value of an attribute.

109 virtual const char_type* getAttributeValue(int idx) const

110 {

111 if (idx < 0 || idx >= (int)Attributes.size())

112 return 0;

113

114 return Attributes[idx].Value.c_str();

115 }

116

117

118 //! Returns the value of an attribute.

119 virtual const char_type* getAttributeValue(const char_type* name) const

120 {

121 const SAttribute* attr = getAttributeByName(name);

122 if (!attr)

123 return 0;

124

125 return attr->Value.c_str();

126 }

127

128

129 //! Returns the value of an attribute

130 virtual const char_type* getAttributeValueSafe(const char_type* name) const

131 {

132 const SAttribute* attr = getAttributeByName(name);

133 if (!attr)

134 return EmptyString.c_str();

135

136 return attr->Value.c_str();

137 }

138

139

140

141 //! Returns the value of an attribute as integer.

142 int getAttributeValueAsInt(const char_type* name) const

143 {

144 return (int)getAttributeValueAsFloat(name);

145 }

146

147

148 //! Returns the value of an attribute as integer.

149 int getAttributeValueAsInt(int idx) const

150 {

151 return (int)getAttributeValueAsFloat(idx);

152 }

153

154

155 //! Returns the value of an attribute as float.

156 float getAttributeValueAsFloat(const char_type* name) const

157 {

158 const SAttribute* attr = getAttributeByName(name);

159 if (!attr)

160 return 0;

161

162 core::stringc c = attr->Value.c_str();

163 return fast_atof(c.c_str());

164 }

165

166

167 //! Returns the value of an attribute as float.

168 float getAttributeValueAsFloat(int idx) const

169 {

170 const char_type* attrvalue = getAttributeValue(idx);

171 if (!attrvalue)

172 return 0;

173

174 core::stringc c = attrvalue;

175 return fast_atof(c.c_str());

176 }

177

178

179 //! Returns the name of the current node.

180 virtual const char_type* getNodeName() const

181 {

182 return NodeName.c_str();

183 }

184

185

186 //! Returns data of the current node.

187 virtual const char_type* getNodeData() const

188 {

189 return NodeName.c_str();

190 }

191

192

193 //! Returns if an element is an empty element, like <foo />

194 virtual bool isEmptyElement() const

195 {

196 return IsEmptyElement;

197 }

198

199 //! Returns format of the source xml file.

200 virtual ETEXT_FORMAT getSourceFormat() const

201 {

202 return SourceFormat;

203 }

204

205 //! Returns format of the strings returned by the parser.

206 virtual ETEXT_FORMAT getParserFormat() const

207 {

208 return TargetFormat;

209 }

210

211 private:

212

213 // Reads the current xml node

214 void parseCurrentNode()

215 {

216 char_type* start = P;

217

218 // more forward until '<' found

219 while(*P != L'<' && *P)

220 ++P;

221

222 if (!*P)

223 return;

224

225 if (P - start > 0)

226 {

227 // we found some text, store it

228 if (setText(start, P))

229 return;

230 }

231

232 ++P;

233

234 // based on current token, parse and report next element

235 switch(*P)

236 {

237 case L'/':

238 parseClosingXMLElement();

239 break;

240 case L'?':

241 ignoreDefinition();

242 break;

243 case L'!':

244 if (!parseCDATA())

245 parseComment();

246 break;

247 default:

248 parseOpeningXMLElement();

249 break;

250 }

251 }

252

253

254 //! sets the state that text was found. Returns true if set should be set

255 bool setText(char_type* start, char_type* end)

256 {

257 // check if text is more than 2 characters, and if not, check if there is

258 // only white space, so that this text won't be reported

259 if (end - start < 3)

260 {

261 char_type* p = start;

262 for(; p != end; ++p)

263 if (!isWhiteSpace(*p))

264 break;

265

266 if (p == end)

267 return false;

268 }

269

270 // set current text to the parsed text, and replace xml special characters

271 core::string<char_type> s(start, (int)(end - start));

272 NodeName = replaceSpecialCharacters(s);

273

274 // current XML node type is text

275 CurrentNodeType = EXN_TEXT;

276

277 return true;

278 }

279

280

281

282 //! ignores an xml definition like <?xml something />

283 void ignoreDefinition()

284 {

285 CurrentNodeType = EXN_UNKNOWN;

286

287 // move until end marked with '>' reached

288 while(*P != L'>')

289 ++P;

290

291 ++P;

292 }

293

294

295 //! parses a comment

296 void parseComment()

297 {

298 CurrentNodeType = EXN_COMMENT;

299 P += 1;

300

301 char_type *pCommentBegin = P;

302

303 int count = 1;

304

305 // move until end of comment reached

306 while(count)

307 {

308 if (*P == L'>')

309 --count;

310 else

311 if (*P == L'<')

312 ++count;

313

314 ++P;

315 }

316

317 P -= 3;

318 NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));

319 P += 3;

320 }

321

322

323 //! parses an opening xml element and reads attributes

324 void parseOpeningXMLElement()

325 {

326 CurrentNodeType = EXN_ELEMENT;

327 IsEmptyElement = false;

328 Attributes.clear();

329

330 // find name

331 const char_type* startName = P;

332

333 // find end of element

334 while(*P != L'>' && !isWhiteSpace(*P))

335 ++P;

336

337 const char_type* endName = P;

338

339 // find Attributes

340 while(*P != L'>')

341 {

342 if (isWhiteSpace(*P))

343 ++P;

344 else

345 {

346 if (*P != L'/')

347 {

348 // we've got an attribute

349

350 // read the attribute names

351 const char_type* attributeNameBegin = P;

352

353 while(!isWhiteSpace(*P) && *P != L'=')

354 ++P;

355

356 const char_type* attributeNameEnd = P;

357 ++P;

358

359 // read the attribute value

360 // check for quotes and single quotes, thx to murphy

361 while( (*P != L'\"') && (*P != L'\'') && *P)

362 ++P;

363

364 if (!*P) // malformatted xml file

365 return;

366

367 const char_type attributeQuoteChar = *P;

368

369 ++P;

370 const char_type* attributeValueBegin = P;

371

372 while(*P != attributeQuoteChar && *P)

373 ++P;

374

375 if (!*P) // malformatted xml file

376 return;

377

378 const char_type* attributeValueEnd = P;

379 ++P;

380

381 SAttribute attr;

382 attr.Name = core::string<char_type>(attributeNameBegin,

383 (int)(attributeNameEnd - attributeNameBegin));

384

385 core::string<char_type> s(attributeValueBegin,

386 (int)(attributeValueEnd - attributeValueBegin));

387

388 attr.Value = replaceSpecialCharacters(s);

389 Attributes.push_back(attr);

390 }

391 else

392 {

393 // tag is closed directly

394 ++P;

395 IsEmptyElement = true;

396 break;

397 }

398 }

399 }

400

401 // check if this tag is closing directly

402 if (endName > startName && *(endName-1) == L'/')

403 {

404 // directly closing tag

405 IsEmptyElement = true;

406 endName--;

407 }

408

409 NodeName = core::string<char_type>(startName, (int)(endName - startName));

410

411 ++P;

412 }

413

414

415 //! parses an closing xml tag

416 void parseClosingXMLElement()

417 {

418 CurrentNodeType = EXN_ELEMENT_END;

419 IsEmptyElement = false;

420 Attributes.clear();

421

422 ++P;

423 const char_type* pBeginClose = P;

424

425 while(*P != L'>')

426 ++P;

427

428 // remove trailing whitespace, if any

429 while( isspace( P[-1]))

430 --P;

431

432 NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));

433 ++P;

434 }

435

436 //! parses a possible CDATA section, returns false if begin was not a CDATA section

437 bool parseCDATA()

438 {

439 if (*(P+1) != L'[')

440 return false;

441

442 CurrentNodeType = EXN_CDATA;

443

444 // skip '<![CDATA['

445 int count=0;

446 while( *P && count<8 )

447 {

448 ++P;

449 ++count;

450 }

451

452 if (!*P)

453 return true;

454

455 char_type *cDataBegin = P;

456 char_type *cDataEnd = 0;

457

458 // find end of CDATA

459 while(*P && !cDataEnd)

460 {

461 if (*P == L'>' &&

462 (*(P-1) == L']') &&

463 (*(P-2) == L']'))

464 {

465 cDataEnd = P - 2;

466 }

467

468 ++P;

469 }

470

471 if ( cDataEnd )

472 NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));

473 else

474 NodeName = "";

475

476 return true;

477 }

478

479

480 // structure for storing attribute-name pairs

481 struct SAttribute

482 {

483 core::string<char_type> Name;

484 core::string<char_type> Value;

485 };

486

487 // finds a current attribute by name, returns 0 if not found

488 const SAttribute* getAttributeByName(const char_type* name) const

489 {

490 if (!name)

491 return 0;

492

493 core::string<char_type> n = name;

494

495 for (int i=0; i<(int)Attributes.size(); ++i)

496 if (Attributes[i].Name == n)

497 return &Attributes[i];

498

499 return 0;

500 }

501

502 // replaces xml special characters in a string and creates a new one

503 core::string<char_type> replaceSpecialCharacters(

504 core::string<char_type>& origstr)

505 {

506 int pos = origstr.findFirst(L'&');

507 int oldPos = 0;

508

509 if (pos == -1)

510 return origstr;

511

512 core::string<char_type> newstr;

513

514 while(pos != -1 && pos < origstr.size()-2)

515 {

516 // check if it is one of the special characters

517

518 int specialChar = -1;

519 for (int i=0; i<(int)SpecialCharacters.size(); ++i)

520 {

521 const char_type* p = &origstr.c_str()[pos]+1;

522

523 if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))

524 {

525 specialChar = i;

526 break;

527 }

528 }

529

530 if (specialChar != -1)

531 {

532 newstr.append(origstr.subString(oldPos, pos - oldPos));

533 newstr.append(SpecialCharacters[specialChar][0]);

534 pos += SpecialCharacters[specialChar].size();

535 }

536 else

537 {

538 newstr.append(origstr.subString(oldPos, pos - oldPos + 1));

539 pos += 1;

540 }

541

542 // find next &

543 oldPos = pos;

544 pos = origstr.findNext(L'&', pos);

545 }

546

547 if (oldPos < origstr.size()-1)

548 newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));

549

550 return newstr;

551 }

552

553

554

555 //! reads the xml file and converts it into the wanted character format.

556 bool readFile(IFileReadCallBack* callback)

557 {

558 int size = callback->getSize();

559 size += 4; // We need two terminating 0's at the end.

560 // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4.

561

562 char* data8 = new char[size];

563

564 if (!callback->read(data8, size-4))

565 {

566 delete [] data8;

567 return false;

568 }

569

570 // add zeros at end

571

572 data8[size-1] = 0;

573 data8[size-2] = 0;

574 data8[size-3] = 0;

575 data8[size-4] = 0;

576

577 char16* data16 = reinterpret_cast<char16*>(data8);

578 char32* data32 = reinterpret_cast<char32*>(data8);

579

580 // now we need to convert the data to the desired target format

581 // based on the byte order mark.

582

583 const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF;

584 const int UTF16_BE = 0xFFFE;

585 const int UTF16_LE = 0xFEFF;

586 const int UTF32_BE = 0xFFFE0000;

587 const int UTF32_LE = 0x0000FEFF;

588

589 // check source for all utf versions and convert to target data format

590

591 if (size >= 4 && data32[0] == (char32)UTF32_BE)

592 {

593 // UTF-32, big endian

594 SourceFormat = ETF_UTF32_BE;

595 convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header

596 }

597 else

598 if (size >= 4 && data32[0] == (char32)UTF32_LE)

599 {

600 // UTF-32, little endian

601 SourceFormat = ETF_UTF32_LE;

602 convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header

603 }

604 else

605 if (size >= 2 && data16[0] == UTF16_BE)

606 {

607 // UTF-16, big endian

608 SourceFormat = ETF_UTF16_BE;

609 convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header

610 }

611 else

612 if (size >= 2 && data16[0] == UTF16_LE)

613 {

614 // UTF-16, little endian

615 SourceFormat = ETF_UTF16_LE;

616 convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header

617 }

618 else

619 if (size >= 3 && data8[0] == UTF8[0] && data8[1] == UTF8[1] && data8[2] == UTF8[2])

620 {

621 // UTF-8

622 SourceFormat = ETF_UTF8;

623 convertTextData(data8+3, data8, size); // data8+3 because we need to skip the header

624 }

625 else

626 {

627 // ASCII

628 SourceFormat = ETF_ASCII;

629 convertTextData(data8, data8, size);

630 }

631

632 return true;

633 }

634

635

636 //! converts the text file into the desired format.

637 //! \param source: begin of the text (without byte order mark)

638 //! \param pointerToStore: pointer to text data block which can be

639 //! stored or deleted based on the nesessary conversion.

640 //! \param sizeWithoutHeader: Text size in characters without header

641 template<class src_char_type>

642 void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)

643 {

644 // convert little to big endian if necessary

645 if (sizeof(src_char_type) > 1 &&

646 isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))

647 convertToLittleEndian(source);

648

649 // check if conversion is necessary:

650 if (sizeof(src_char_type) == sizeof(char_type))

651 {

652 // no need to convert

653 TextBegin = (char_type*)source;

654 TextData = (char_type*)pointerToStore;

655 TextSize = sizeWithoutHeader;

656 }

657 else

658 {

659 // convert source into target data format.

660 // TODO: implement a real conversion. This one just

661 // copies bytes. This is a problem when there are

662 // unicode symbols using more than one character.

663

664 TextData = new char_type[sizeWithoutHeader];

665

666 // MSVC debugger complains here about loss of data ...

667

668

669 // FIXME - gcc complains about 'shift width larger than width of type'

670 // for T == unsigned long. Avoid it by messing around volatile ..

671 volatile unsigned int c = 3;

672 const src_char_type cc = (src_char_type)((((uint64_t)1u << (sizeof( char_type)<<c)) - 1));

673 for (int i=0; i<sizeWithoutHeader; ++i)

674 TextData[i] = char_type( source[i] & cc);

675

676 TextBegin = TextData;

677 TextSize = sizeWithoutHeader;

678

679 // delete original data because no longer needed

680 delete [] pointerToStore;

681 }

682 }

683

684 //! converts whole text buffer to little endian

685 template<class src_char_type>

686 void convertToLittleEndian(src_char_type* t)

687 {

688 if (sizeof(src_char_type) == 4)

689 {

690 // 32 bit

691

692 while(*t)

693 {

694 *t = ((*t & 0xff000000) >> 24) |

695 ((*t & 0x00ff0000) >> 8) |

696 ((*t & 0x0000ff00) << 8) |

697 ((*t & 0x000000ff) << 24);

698 ++t;

699 }

700 }

701 else

702 {

703 // 16 bit

704

705 while(*t)

706 {

707 *t = (*t >> 8) | (*t << 8);

708 ++t;

709 }

710 }

711 }

712

713 //! returns if a format is little endian

714 inline bool isLittleEndian(ETEXT_FORMAT f)

715 {

716 return f == ETF_ASCII ||

717 f == ETF_UTF8 ||

718 f == ETF_UTF16_LE ||

719 f == ETF_UTF32_LE;

720 }

721

722

723 //! returns true if a character is whitespace

724 inline bool isWhiteSpace(char_type c)

725 {

726 return (c==' ' || c=='\t' || c=='\n' || c=='\r');

727 }

728

729

730 //! generates a list with xml special characters

731 void createSpecialCharacterList()

732 {

733 // list of strings containing special symbols,

734 // the first character is the special character,

735 // the following is the symbol string without trailing &.

736

737 SpecialCharacters.push_back("&");

738 SpecialCharacters.push_back("<lt;");

739 SpecialCharacters.push_back(">gt;");

740 SpecialCharacters.push_back("\"quot;");

741 SpecialCharacters.push_back("'apos;");

742

743 }

744

745

746 //! compares the first n characters of the strings

747 bool equalsn(const char_type* str1, const char_type* str2, int len)

748 {

749 int i;

750 for(i=0; str1[i] && str2[i] && i < len; ++i)

751 if (str1[i] != str2[i])

752 return false;

753

754 // if one (or both) of the strings was smaller then they

755 // are only equal if they have the same lenght

756 return (i == len) || (str1[i] == 0 && str2[i] == 0);

757 }

758

759

760 //! stores the target text format

761 void storeTargetFormat()

762 {

763 // get target format. We could have done this using template specialization,

764 // but VisualStudio 6 don't like it and we want to support it.

765

766 switch(sizeof(char_type))

767 {

768 case 1:

769 TargetFormat = ETF_UTF8;

770 break;

771 case 2:

772 TargetFormat = ETF_UTF16_LE;

773 break;

774 case 4:

775 TargetFormat = ETF_UTF32_LE;

776 break;

777 default:

778 TargetFormat = ETF_ASCII; // should never happen.

779 }

780 }

781

782

783 // instance variables:

784

785 char_type* TextData; // data block of the text file

786 char_type* P; // current point in text to parse

787 char_type* TextBegin; // start of text to parse

788 unsigned int TextSize; // size of text to parse in characters, not bytes

789

790 EXML_NODE CurrentNodeType; // type of the currently parsed node

791 ETEXT_FORMAT SourceFormat; // source format of the xml file

792 ETEXT_FORMAT TargetFormat; // output format of this parser

793

794 core::string<char_type> NodeName; // name of the node currently in

795 core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods

796

797 bool IsEmptyElement; // is the currently parsed node empty?

798

799 core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList()

800

801 core::array<SAttribute> Attributes; // attributes of current element

802

803 }; // end CXMLReaderImpl

804

805

806 } // end namespace

807 } // end namespace

808

809 #endif