vrshoot

view libs/assimp/irrXML/CXMLReaderImpl.h @ 0:b2f14e535253

initial commit
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 01 Feb 2014 19:58:19 +0200
parents
children
line source
1 // Copyright (C) 2002-2005 Nikolaus Gebhardt
2 // This file is part of the "Irrlicht Engine" and the "irrXML" project.
3 // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h
5 #ifndef __ICXML_READER_IMPL_H_INCLUDED__
6 #define __ICXML_READER_IMPL_H_INCLUDED__
8 #include "irrXML.h"
9 #include "irrString.h"
10 #include "irrArray.h"
12 using namespace Assimp;
14 #ifdef _DEBUG
15 #define IRR_DEBUGPRINT(x) printf((x));
16 #else // _DEBUG
17 #define IRR_DEBUGPRINT(x)
18 #endif // _DEBUG
21 namespace irr
22 {
23 namespace io
24 {
27 //! implementation of the IrrXMLReader
28 template<class char_type, class superclass>
29 class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>
30 {
31 public:
33 //! Constructor
34 CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)
35 : TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE),
36 SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII)
37 {
38 if (!callback)
39 return;
41 storeTargetFormat();
43 // read whole xml file
45 readFile(callback);
47 // clean up
49 if (deleteCallBack)
50 delete callback;
52 // create list with special characters
54 createSpecialCharacterList();
56 // set pointer to text begin
57 P = TextBegin;
58 }
61 //! Destructor
62 virtual ~CXMLReaderImpl()
63 {
64 delete [] TextData;
65 }
68 //! Reads forward to the next xml node.
69 //! \return Returns false, if there was no further node.
70 virtual bool read()
71 {
72 // if not end reached, parse the node
73 if (P && (unsigned int)(P - TextBegin) < TextSize - 1 && *P != 0)
74 {
75 parseCurrentNode();
76 return true;
77 }
79 _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;
80 return false;
81 }
84 //! Returns the type of the current XML node.
85 virtual EXML_NODE getNodeType() const
86 {
87 return CurrentNodeType;
88 }
91 //! Returns attribute count of the current XML node.
92 virtual int getAttributeCount() const
93 {
94 return Attributes.size();
95 }
98 //! Returns name of an attribute.
99 virtual const char_type* getAttributeName(int idx) const
100 {
101 if (idx < 0 || idx >= (int)Attributes.size())
102 return 0;
104 return Attributes[idx].Name.c_str();
105 }
108 //! Returns the value of an attribute.
109 virtual const char_type* getAttributeValue(int idx) const
110 {
111 if (idx < 0 || idx >= (int)Attributes.size())
112 return 0;
114 return Attributes[idx].Value.c_str();
115 }
118 //! Returns the value of an attribute.
119 virtual const char_type* getAttributeValue(const char_type* name) const
120 {
121 const SAttribute* attr = getAttributeByName(name);
122 if (!attr)
123 return 0;
125 return attr->Value.c_str();
126 }
129 //! Returns the value of an attribute
130 virtual const char_type* getAttributeValueSafe(const char_type* name) const
131 {
132 const SAttribute* attr = getAttributeByName(name);
133 if (!attr)
134 return EmptyString.c_str();
136 return attr->Value.c_str();
137 }
141 //! Returns the value of an attribute as integer.
142 int getAttributeValueAsInt(const char_type* name) const
143 {
144 return (int)getAttributeValueAsFloat(name);
145 }
148 //! Returns the value of an attribute as integer.
149 int getAttributeValueAsInt(int idx) const
150 {
151 return (int)getAttributeValueAsFloat(idx);
152 }
155 //! Returns the value of an attribute as float.
156 float getAttributeValueAsFloat(const char_type* name) const
157 {
158 const SAttribute* attr = getAttributeByName(name);
159 if (!attr)
160 return 0;
162 core::stringc c = attr->Value.c_str();
163 return fast_atof(c.c_str());
164 }
167 //! Returns the value of an attribute as float.
168 float getAttributeValueAsFloat(int idx) const
169 {
170 const char_type* attrvalue = getAttributeValue(idx);
171 if (!attrvalue)
172 return 0;
174 core::stringc c = attrvalue;
175 return fast_atof(c.c_str());
176 }
179 //! Returns the name of the current node.
180 virtual const char_type* getNodeName() const
181 {
182 return NodeName.c_str();
183 }
186 //! Returns data of the current node.
187 virtual const char_type* getNodeData() const
188 {
189 return NodeName.c_str();
190 }
193 //! Returns if an element is an empty element, like <foo />
194 virtual bool isEmptyElement() const
195 {
196 return IsEmptyElement;
197 }
199 //! Returns format of the source xml file.
200 virtual ETEXT_FORMAT getSourceFormat() const
201 {
202 return SourceFormat;
203 }
205 //! Returns format of the strings returned by the parser.
206 virtual ETEXT_FORMAT getParserFormat() const
207 {
208 return TargetFormat;
209 }
211 private:
213 // Reads the current xml node
214 void parseCurrentNode()
215 {
216 char_type* start = P;
218 // more forward until '<' found
219 while(*P != L'<' && *P)
220 ++P;
222 if (!*P)
223 return;
225 if (P - start > 0)
226 {
227 // we found some text, store it
228 if (setText(start, P))
229 return;
230 }
232 ++P;
234 // based on current token, parse and report next element
235 switch(*P)
236 {
237 case L'/':
238 parseClosingXMLElement();
239 break;
240 case L'?':
241 ignoreDefinition();
242 break;
243 case L'!':
244 if (!parseCDATA())
245 parseComment();
246 break;
247 default:
248 parseOpeningXMLElement();
249 break;
250 }
251 }
254 //! sets the state that text was found. Returns true if set should be set
255 bool setText(char_type* start, char_type* end)
256 {
257 // check if text is more than 2 characters, and if not, check if there is
258 // only white space, so that this text won't be reported
259 if (end - start < 3)
260 {
261 char_type* p = start;
262 for(; p != end; ++p)
263 if (!isWhiteSpace(*p))
264 break;
266 if (p == end)
267 return false;
268 }
270 // set current text to the parsed text, and replace xml special characters
271 core::string<char_type> s(start, (int)(end - start));
272 NodeName = replaceSpecialCharacters(s);
274 // current XML node type is text
275 CurrentNodeType = EXN_TEXT;
277 return true;
278 }
282 //! ignores an xml definition like <?xml something />
283 void ignoreDefinition()
284 {
285 CurrentNodeType = EXN_UNKNOWN;
287 // move until end marked with '>' reached
288 while(*P != L'>')
289 ++P;
291 ++P;
292 }
295 //! parses a comment
296 void parseComment()
297 {
298 CurrentNodeType = EXN_COMMENT;
299 P += 1;
301 char_type *pCommentBegin = P;
303 int count = 1;
305 // move until end of comment reached
306 while(count)
307 {
308 if (*P == L'>')
309 --count;
310 else
311 if (*P == L'<')
312 ++count;
314 ++P;
315 }
317 P -= 3;
318 NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));
319 P += 3;
320 }
323 //! parses an opening xml element and reads attributes
324 void parseOpeningXMLElement()
325 {
326 CurrentNodeType = EXN_ELEMENT;
327 IsEmptyElement = false;
328 Attributes.clear();
330 // find name
331 const char_type* startName = P;
333 // find end of element
334 while(*P != L'>' && !isWhiteSpace(*P))
335 ++P;
337 const char_type* endName = P;
339 // find Attributes
340 while(*P != L'>')
341 {
342 if (isWhiteSpace(*P))
343 ++P;
344 else
345 {
346 if (*P != L'/')
347 {
348 // we've got an attribute
350 // read the attribute names
351 const char_type* attributeNameBegin = P;
353 while(!isWhiteSpace(*P) && *P != L'=')
354 ++P;
356 const char_type* attributeNameEnd = P;
357 ++P;
359 // read the attribute value
360 // check for quotes and single quotes, thx to murphy
361 while( (*P != L'\"') && (*P != L'\'') && *P)
362 ++P;
364 if (!*P) // malformatted xml file
365 return;
367 const char_type attributeQuoteChar = *P;
369 ++P;
370 const char_type* attributeValueBegin = P;
372 while(*P != attributeQuoteChar && *P)
373 ++P;
375 if (!*P) // malformatted xml file
376 return;
378 const char_type* attributeValueEnd = P;
379 ++P;
381 SAttribute attr;
382 attr.Name = core::string<char_type>(attributeNameBegin,
383 (int)(attributeNameEnd - attributeNameBegin));
385 core::string<char_type> s(attributeValueBegin,
386 (int)(attributeValueEnd - attributeValueBegin));
388 attr.Value = replaceSpecialCharacters(s);
389 Attributes.push_back(attr);
390 }
391 else
392 {
393 // tag is closed directly
394 ++P;
395 IsEmptyElement = true;
396 break;
397 }
398 }
399 }
401 // check if this tag is closing directly
402 if (endName > startName && *(endName-1) == L'/')
403 {
404 // directly closing tag
405 IsEmptyElement = true;
406 endName--;
407 }
409 NodeName = core::string<char_type>(startName, (int)(endName - startName));
411 ++P;
412 }
415 //! parses an closing xml tag
416 void parseClosingXMLElement()
417 {
418 CurrentNodeType = EXN_ELEMENT_END;
419 IsEmptyElement = false;
420 Attributes.clear();
422 ++P;
423 const char_type* pBeginClose = P;
425 while(*P != L'>')
426 ++P;
428 // remove trailing whitespace, if any
429 while( isspace( P[-1]))
430 --P;
432 NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));
433 ++P;
434 }
436 //! parses a possible CDATA section, returns false if begin was not a CDATA section
437 bool parseCDATA()
438 {
439 if (*(P+1) != L'[')
440 return false;
442 CurrentNodeType = EXN_CDATA;
444 // skip '<![CDATA['
445 int count=0;
446 while( *P && count<8 )
447 {
448 ++P;
449 ++count;
450 }
452 if (!*P)
453 return true;
455 char_type *cDataBegin = P;
456 char_type *cDataEnd = 0;
458 // find end of CDATA
459 while(*P && !cDataEnd)
460 {
461 if (*P == L'>' &&
462 (*(P-1) == L']') &&
463 (*(P-2) == L']'))
464 {
465 cDataEnd = P - 2;
466 }
468 ++P;
469 }
471 if ( cDataEnd )
472 NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));
473 else
474 NodeName = "";
476 return true;
477 }
480 // structure for storing attribute-name pairs
481 struct SAttribute
482 {
483 core::string<char_type> Name;
484 core::string<char_type> Value;
485 };
487 // finds a current attribute by name, returns 0 if not found
488 const SAttribute* getAttributeByName(const char_type* name) const
489 {
490 if (!name)
491 return 0;
493 core::string<char_type> n = name;
495 for (int i=0; i<(int)Attributes.size(); ++i)
496 if (Attributes[i].Name == n)
497 return &Attributes[i];
499 return 0;
500 }
502 // replaces xml special characters in a string and creates a new one
503 core::string<char_type> replaceSpecialCharacters(
504 core::string<char_type>& origstr)
505 {
506 int pos = origstr.findFirst(L'&');
507 int oldPos = 0;
509 if (pos == -1)
510 return origstr;
512 core::string<char_type> newstr;
514 while(pos != -1 && pos < origstr.size()-2)
515 {
516 // check if it is one of the special characters
518 int specialChar = -1;
519 for (int i=0; i<(int)SpecialCharacters.size(); ++i)
520 {
521 const char_type* p = &origstr.c_str()[pos]+1;
523 if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))
524 {
525 specialChar = i;
526 break;
527 }
528 }
530 if (specialChar != -1)
531 {
532 newstr.append(origstr.subString(oldPos, pos - oldPos));
533 newstr.append(SpecialCharacters[specialChar][0]);
534 pos += SpecialCharacters[specialChar].size();
535 }
536 else
537 {
538 newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
539 pos += 1;
540 }
542 // find next &
543 oldPos = pos;
544 pos = origstr.findNext(L'&', pos);
545 }
547 if (oldPos < origstr.size()-1)
548 newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));
550 return newstr;
551 }
555 //! reads the xml file and converts it into the wanted character format.
556 bool readFile(IFileReadCallBack* callback)
557 {
558 int size = callback->getSize();
559 size += 4; // We need two terminating 0's at the end.
560 // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4.
562 char* data8 = new char[size];
564 if (!callback->read(data8, size-4))
565 {
566 delete [] data8;
567 return false;
568 }
570 // add zeros at end
572 data8[size-1] = 0;
573 data8[size-2] = 0;
574 data8[size-3] = 0;
575 data8[size-4] = 0;
577 char16* data16 = reinterpret_cast<char16*>(data8);
578 char32* data32 = reinterpret_cast<char32*>(data8);
580 // now we need to convert the data to the desired target format
581 // based on the byte order mark.
583 const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF;
584 const int UTF16_BE = 0xFFFE;
585 const int UTF16_LE = 0xFEFF;
586 const int UTF32_BE = 0xFFFE0000;
587 const int UTF32_LE = 0x0000FEFF;
589 // check source for all utf versions and convert to target data format
591 if (size >= 4 && data32[0] == (char32)UTF32_BE)
592 {
593 // UTF-32, big endian
594 SourceFormat = ETF_UTF32_BE;
595 convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
596 }
597 else
598 if (size >= 4 && data32[0] == (char32)UTF32_LE)
599 {
600 // UTF-32, little endian
601 SourceFormat = ETF_UTF32_LE;
602 convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
603 }
604 else
605 if (size >= 2 && data16[0] == UTF16_BE)
606 {
607 // UTF-16, big endian
608 SourceFormat = ETF_UTF16_BE;
609 convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
610 }
611 else
612 if (size >= 2 && data16[0] == UTF16_LE)
613 {
614 // UTF-16, little endian
615 SourceFormat = ETF_UTF16_LE;
616 convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
617 }
618 else
619 if (size >= 3 && data8[0] == UTF8[0] && data8[1] == UTF8[1] && data8[2] == UTF8[2])
620 {
621 // UTF-8
622 SourceFormat = ETF_UTF8;
623 convertTextData(data8+3, data8, size); // data8+3 because we need to skip the header
624 }
625 else
626 {
627 // ASCII
628 SourceFormat = ETF_ASCII;
629 convertTextData(data8, data8, size);
630 }
632 return true;
633 }
636 //! converts the text file into the desired format.
637 //! \param source: begin of the text (without byte order mark)
638 //! \param pointerToStore: pointer to text data block which can be
639 //! stored or deleted based on the nesessary conversion.
640 //! \param sizeWithoutHeader: Text size in characters without header
641 template<class src_char_type>
642 void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)
643 {
644 // convert little to big endian if necessary
645 if (sizeof(src_char_type) > 1 &&
646 isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))
647 convertToLittleEndian(source);
649 // check if conversion is necessary:
650 if (sizeof(src_char_type) == sizeof(char_type))
651 {
652 // no need to convert
653 TextBegin = (char_type*)source;
654 TextData = (char_type*)pointerToStore;
655 TextSize = sizeWithoutHeader;
656 }
657 else
658 {
659 // convert source into target data format.
660 // TODO: implement a real conversion. This one just
661 // copies bytes. This is a problem when there are
662 // unicode symbols using more than one character.
664 TextData = new char_type[sizeWithoutHeader];
666 // MSVC debugger complains here about loss of data ...
669 // FIXME - gcc complains about 'shift width larger than width of type'
670 // for T == unsigned long. Avoid it by messing around volatile ..
671 volatile unsigned int c = 3;
672 const src_char_type cc = (src_char_type)((((uint64_t)1u << (sizeof( char_type)<<c)) - 1));
673 for (int i=0; i<sizeWithoutHeader; ++i)
674 TextData[i] = char_type( source[i] & cc);
676 TextBegin = TextData;
677 TextSize = sizeWithoutHeader;
679 // delete original data because no longer needed
680 delete [] pointerToStore;
681 }
682 }
684 //! converts whole text buffer to little endian
685 template<class src_char_type>
686 void convertToLittleEndian(src_char_type* t)
687 {
688 if (sizeof(src_char_type) == 4)
689 {
690 // 32 bit
692 while(*t)
693 {
694 *t = ((*t & 0xff000000) >> 24) |
695 ((*t & 0x00ff0000) >> 8) |
696 ((*t & 0x0000ff00) << 8) |
697 ((*t & 0x000000ff) << 24);
698 ++t;
699 }
700 }
701 else
702 {
703 // 16 bit
705 while(*t)
706 {
707 *t = (*t >> 8) | (*t << 8);
708 ++t;
709 }
710 }
711 }
713 //! returns if a format is little endian
714 inline bool isLittleEndian(ETEXT_FORMAT f)
715 {
716 return f == ETF_ASCII ||
717 f == ETF_UTF8 ||
718 f == ETF_UTF16_LE ||
719 f == ETF_UTF32_LE;
720 }
723 //! returns true if a character is whitespace
724 inline bool isWhiteSpace(char_type c)
725 {
726 return (c==' ' || c=='\t' || c=='\n' || c=='\r');
727 }
730 //! generates a list with xml special characters
731 void createSpecialCharacterList()
732 {
733 // list of strings containing special symbols,
734 // the first character is the special character,
735 // the following is the symbol string without trailing &.
737 SpecialCharacters.push_back("&amp;");
738 SpecialCharacters.push_back("<lt;");
739 SpecialCharacters.push_back(">gt;");
740 SpecialCharacters.push_back("\"quot;");
741 SpecialCharacters.push_back("'apos;");
743 }
746 //! compares the first n characters of the strings
747 bool equalsn(const char_type* str1, const char_type* str2, int len)
748 {
749 int i;
750 for(i=0; str1[i] && str2[i] && i < len; ++i)
751 if (str1[i] != str2[i])
752 return false;
754 // if one (or both) of the strings was smaller then they
755 // are only equal if they have the same lenght
756 return (i == len) || (str1[i] == 0 && str2[i] == 0);
757 }
760 //! stores the target text format
761 void storeTargetFormat()
762 {
763 // get target format. We could have done this using template specialization,
764 // but VisualStudio 6 don't like it and we want to support it.
766 switch(sizeof(char_type))
767 {
768 case 1:
769 TargetFormat = ETF_UTF8;
770 break;
771 case 2:
772 TargetFormat = ETF_UTF16_LE;
773 break;
774 case 4:
775 TargetFormat = ETF_UTF32_LE;
776 break;
777 default:
778 TargetFormat = ETF_ASCII; // should never happen.
779 }
780 }
783 // instance variables:
785 char_type* TextData; // data block of the text file
786 char_type* P; // current point in text to parse
787 char_type* TextBegin; // start of text to parse
788 unsigned int TextSize; // size of text to parse in characters, not bytes
790 EXML_NODE CurrentNodeType; // type of the currently parsed node
791 ETEXT_FORMAT SourceFormat; // source format of the xml file
792 ETEXT_FORMAT TargetFormat; // output format of this parser
794 core::string<char_type> NodeName; // name of the node currently in
795 core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods
797 bool IsEmptyElement; // is the currently parsed node empty?
799 core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList()
801 core::array<SAttribute> Attributes; // attributes of current element
803 }; // end CXMLReaderImpl
806 } // end namespace
807 } // end namespace
809 #endif