nuclear@0: // Copyright (C) 2002-2005 Nikolaus Gebhardt nuclear@0: // This file is part of the "Irrlicht Engine" and the "irrXML" project. nuclear@0: // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h nuclear@0: nuclear@0: #ifndef __ICXML_READER_IMPL_H_INCLUDED__ nuclear@0: #define __ICXML_READER_IMPL_H_INCLUDED__ nuclear@0: nuclear@0: #include "irrXML.h" nuclear@0: #include "irrString.h" nuclear@0: #include "irrArray.h" nuclear@0: nuclear@0: using namespace Assimp; nuclear@0: nuclear@0: #ifdef _DEBUG nuclear@0: #define IRR_DEBUGPRINT(x) printf((x)); nuclear@0: #else // _DEBUG nuclear@0: #define IRR_DEBUGPRINT(x) nuclear@0: #endif // _DEBUG nuclear@0: nuclear@0: nuclear@0: namespace irr nuclear@0: { nuclear@0: namespace io nuclear@0: { nuclear@0: nuclear@0: nuclear@0: //! implementation of the IrrXMLReader nuclear@0: template nuclear@0: class CXMLReaderImpl : public IIrrXMLReader nuclear@0: { nuclear@0: public: nuclear@0: nuclear@0: //! Constructor nuclear@0: CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true) nuclear@0: : TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE), nuclear@0: SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII) nuclear@0: { nuclear@0: if (!callback) nuclear@0: return; nuclear@0: nuclear@0: storeTargetFormat(); nuclear@0: nuclear@0: // read whole xml file nuclear@0: nuclear@0: readFile(callback); nuclear@0: nuclear@0: // clean up nuclear@0: nuclear@0: if (deleteCallBack) nuclear@0: delete callback; nuclear@0: nuclear@0: // create list with special characters nuclear@0: nuclear@0: createSpecialCharacterList(); nuclear@0: nuclear@0: // set pointer to text begin nuclear@0: P = TextBegin; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Destructor nuclear@0: virtual ~CXMLReaderImpl() nuclear@0: { nuclear@0: delete [] TextData; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Reads forward to the next xml node. nuclear@0: //! \return Returns false, if there was no further node. nuclear@0: virtual bool read() nuclear@0: { nuclear@0: // if not end reached, parse the node nuclear@0: if (P && (unsigned int)(P - TextBegin) < TextSize - 1 && *P != 0) nuclear@0: { nuclear@0: parseCurrentNode(); nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX; nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns the type of the current XML node. nuclear@0: virtual EXML_NODE getNodeType() const nuclear@0: { nuclear@0: return CurrentNodeType; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns attribute count of the current XML node. nuclear@0: virtual int getAttributeCount() const nuclear@0: { nuclear@0: return Attributes.size(); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns name of an attribute. nuclear@0: virtual const char_type* getAttributeName(int idx) const nuclear@0: { nuclear@0: if (idx < 0 || idx >= (int)Attributes.size()) nuclear@0: return 0; nuclear@0: nuclear@0: return Attributes[idx].Name.c_str(); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns the value of an attribute. nuclear@0: virtual const char_type* getAttributeValue(int idx) const nuclear@0: { nuclear@0: if (idx < 0 || idx >= (int)Attributes.size()) nuclear@0: return 0; nuclear@0: nuclear@0: return Attributes[idx].Value.c_str(); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns the value of an attribute. nuclear@0: virtual const char_type* getAttributeValue(const char_type* name) const nuclear@0: { nuclear@0: const SAttribute* attr = getAttributeByName(name); nuclear@0: if (!attr) nuclear@0: return 0; nuclear@0: nuclear@0: return attr->Value.c_str(); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns the value of an attribute nuclear@0: virtual const char_type* getAttributeValueSafe(const char_type* name) const nuclear@0: { nuclear@0: const SAttribute* attr = getAttributeByName(name); nuclear@0: if (!attr) nuclear@0: return EmptyString.c_str(); nuclear@0: nuclear@0: return attr->Value.c_str(); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: nuclear@0: //! Returns the value of an attribute as integer. nuclear@0: int getAttributeValueAsInt(const char_type* name) const nuclear@0: { nuclear@0: return (int)getAttributeValueAsFloat(name); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns the value of an attribute as integer. nuclear@0: int getAttributeValueAsInt(int idx) const nuclear@0: { nuclear@0: return (int)getAttributeValueAsFloat(idx); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns the value of an attribute as float. nuclear@0: float getAttributeValueAsFloat(const char_type* name) const nuclear@0: { nuclear@0: const SAttribute* attr = getAttributeByName(name); nuclear@0: if (!attr) nuclear@0: return 0; nuclear@0: nuclear@0: core::stringc c = attr->Value.c_str(); nuclear@0: return fast_atof(c.c_str()); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns the value of an attribute as float. nuclear@0: float getAttributeValueAsFloat(int idx) const nuclear@0: { nuclear@0: const char_type* attrvalue = getAttributeValue(idx); nuclear@0: if (!attrvalue) nuclear@0: return 0; nuclear@0: nuclear@0: core::stringc c = attrvalue; nuclear@0: return fast_atof(c.c_str()); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns the name of the current node. nuclear@0: virtual const char_type* getNodeName() const nuclear@0: { nuclear@0: return NodeName.c_str(); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns data of the current node. nuclear@0: virtual const char_type* getNodeData() const nuclear@0: { nuclear@0: return NodeName.c_str(); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! Returns if an element is an empty element, like nuclear@0: virtual bool isEmptyElement() const nuclear@0: { nuclear@0: return IsEmptyElement; nuclear@0: } nuclear@0: nuclear@0: //! Returns format of the source xml file. nuclear@0: virtual ETEXT_FORMAT getSourceFormat() const nuclear@0: { nuclear@0: return SourceFormat; nuclear@0: } nuclear@0: nuclear@0: //! Returns format of the strings returned by the parser. nuclear@0: virtual ETEXT_FORMAT getParserFormat() const nuclear@0: { nuclear@0: return TargetFormat; nuclear@0: } nuclear@0: nuclear@0: private: nuclear@0: nuclear@0: // Reads the current xml node nuclear@0: void parseCurrentNode() nuclear@0: { nuclear@0: char_type* start = P; nuclear@0: nuclear@0: // more forward until '<' found nuclear@0: while(*P != L'<' && *P) nuclear@0: ++P; nuclear@0: nuclear@0: if (!*P) nuclear@0: return; nuclear@0: nuclear@0: if (P - start > 0) nuclear@0: { nuclear@0: // we found some text, store it nuclear@0: if (setText(start, P)) nuclear@0: return; nuclear@0: } nuclear@0: nuclear@0: ++P; nuclear@0: nuclear@0: // based on current token, parse and report next element nuclear@0: switch(*P) nuclear@0: { nuclear@0: case L'/': nuclear@0: parseClosingXMLElement(); nuclear@0: break; nuclear@0: case L'?': nuclear@0: ignoreDefinition(); nuclear@0: break; nuclear@0: case L'!': nuclear@0: if (!parseCDATA()) nuclear@0: parseComment(); nuclear@0: break; nuclear@0: default: nuclear@0: parseOpeningXMLElement(); nuclear@0: break; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! sets the state that text was found. Returns true if set should be set nuclear@0: bool setText(char_type* start, char_type* end) nuclear@0: { nuclear@0: // check if text is more than 2 characters, and if not, check if there is nuclear@0: // only white space, so that this text won't be reported nuclear@0: if (end - start < 3) nuclear@0: { nuclear@0: char_type* p = start; nuclear@0: for(; p != end; ++p) nuclear@0: if (!isWhiteSpace(*p)) nuclear@0: break; nuclear@0: nuclear@0: if (p == end) nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@0: // set current text to the parsed text, and replace xml special characters nuclear@0: core::string s(start, (int)(end - start)); nuclear@0: NodeName = replaceSpecialCharacters(s); nuclear@0: nuclear@0: // current XML node type is text nuclear@0: CurrentNodeType = EXN_TEXT; nuclear@0: nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: nuclear@0: //! ignores an xml definition like nuclear@0: void ignoreDefinition() nuclear@0: { nuclear@0: CurrentNodeType = EXN_UNKNOWN; nuclear@0: nuclear@0: // move until end marked with '>' reached nuclear@0: while(*P != L'>') nuclear@0: ++P; nuclear@0: nuclear@0: ++P; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! parses a comment nuclear@0: void parseComment() nuclear@0: { nuclear@0: CurrentNodeType = EXN_COMMENT; nuclear@0: P += 1; nuclear@0: nuclear@0: char_type *pCommentBegin = P; nuclear@0: nuclear@0: int count = 1; nuclear@0: nuclear@0: // move until end of comment reached nuclear@0: while(count) nuclear@0: { nuclear@0: if (*P == L'>') nuclear@0: --count; nuclear@0: else nuclear@0: if (*P == L'<') nuclear@0: ++count; nuclear@0: nuclear@0: ++P; nuclear@0: } nuclear@0: nuclear@0: P -= 3; nuclear@0: NodeName = core::string(pCommentBegin+2, (int)(P - pCommentBegin-2)); nuclear@0: P += 3; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! parses an opening xml element and reads attributes nuclear@0: void parseOpeningXMLElement() nuclear@0: { nuclear@0: CurrentNodeType = EXN_ELEMENT; nuclear@0: IsEmptyElement = false; nuclear@0: Attributes.clear(); nuclear@0: nuclear@0: // find name nuclear@0: const char_type* startName = P; nuclear@0: nuclear@0: // find end of element nuclear@0: while(*P != L'>' && !isWhiteSpace(*P)) nuclear@0: ++P; nuclear@0: nuclear@0: const char_type* endName = P; nuclear@0: nuclear@0: // find Attributes nuclear@0: while(*P != L'>') nuclear@0: { nuclear@0: if (isWhiteSpace(*P)) nuclear@0: ++P; nuclear@0: else nuclear@0: { nuclear@0: if (*P != L'/') nuclear@0: { nuclear@0: // we've got an attribute nuclear@0: nuclear@0: // read the attribute names nuclear@0: const char_type* attributeNameBegin = P; nuclear@0: nuclear@0: while(!isWhiteSpace(*P) && *P != L'=') nuclear@0: ++P; nuclear@0: nuclear@0: const char_type* attributeNameEnd = P; nuclear@0: ++P; nuclear@0: nuclear@0: // read the attribute value nuclear@0: // check for quotes and single quotes, thx to murphy nuclear@0: while( (*P != L'\"') && (*P != L'\'') && *P) nuclear@0: ++P; nuclear@0: nuclear@0: if (!*P) // malformatted xml file nuclear@0: return; nuclear@0: nuclear@0: const char_type attributeQuoteChar = *P; nuclear@0: nuclear@0: ++P; nuclear@0: const char_type* attributeValueBegin = P; nuclear@0: nuclear@0: while(*P != attributeQuoteChar && *P) nuclear@0: ++P; nuclear@0: nuclear@0: if (!*P) // malformatted xml file nuclear@0: return; nuclear@0: nuclear@0: const char_type* attributeValueEnd = P; nuclear@0: ++P; nuclear@0: nuclear@0: SAttribute attr; nuclear@0: attr.Name = core::string(attributeNameBegin, nuclear@0: (int)(attributeNameEnd - attributeNameBegin)); nuclear@0: nuclear@0: core::string s(attributeValueBegin, nuclear@0: (int)(attributeValueEnd - attributeValueBegin)); nuclear@0: nuclear@0: attr.Value = replaceSpecialCharacters(s); nuclear@0: Attributes.push_back(attr); nuclear@0: } nuclear@0: else nuclear@0: { nuclear@0: // tag is closed directly nuclear@0: ++P; nuclear@0: IsEmptyElement = true; nuclear@0: break; nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: // check if this tag is closing directly nuclear@0: if (endName > startName && *(endName-1) == L'/') nuclear@0: { nuclear@0: // directly closing tag nuclear@0: IsEmptyElement = true; nuclear@0: endName--; nuclear@0: } nuclear@0: nuclear@0: NodeName = core::string(startName, (int)(endName - startName)); nuclear@0: nuclear@0: ++P; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! parses an closing xml tag nuclear@0: void parseClosingXMLElement() nuclear@0: { nuclear@0: CurrentNodeType = EXN_ELEMENT_END; nuclear@0: IsEmptyElement = false; nuclear@0: Attributes.clear(); nuclear@0: nuclear@0: ++P; nuclear@0: const char_type* pBeginClose = P; nuclear@0: nuclear@0: while(*P != L'>') nuclear@0: ++P; nuclear@0: nuclear@0: // remove trailing whitespace, if any nuclear@0: while( isspace( P[-1])) nuclear@0: --P; nuclear@0: nuclear@0: NodeName = core::string(pBeginClose, (int)(P - pBeginClose)); nuclear@0: ++P; nuclear@0: } nuclear@0: nuclear@0: //! parses a possible CDATA section, returns false if begin was not a CDATA section nuclear@0: bool parseCDATA() nuclear@0: { nuclear@0: if (*(P+1) != L'[') nuclear@0: return false; nuclear@0: nuclear@0: CurrentNodeType = EXN_CDATA; nuclear@0: nuclear@0: // skip '' && nuclear@0: (*(P-1) == L']') && nuclear@0: (*(P-2) == L']')) nuclear@0: { nuclear@0: cDataEnd = P - 2; nuclear@0: } nuclear@0: nuclear@0: ++P; nuclear@0: } nuclear@0: nuclear@0: if ( cDataEnd ) nuclear@0: NodeName = core::string(cDataBegin, (int)(cDataEnd - cDataBegin)); nuclear@0: else nuclear@0: NodeName = ""; nuclear@0: nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // structure for storing attribute-name pairs nuclear@0: struct SAttribute nuclear@0: { nuclear@0: core::string Name; nuclear@0: core::string Value; nuclear@0: }; nuclear@0: nuclear@0: // finds a current attribute by name, returns 0 if not found nuclear@0: const SAttribute* getAttributeByName(const char_type* name) const nuclear@0: { nuclear@0: if (!name) nuclear@0: return 0; nuclear@0: nuclear@0: core::string n = name; nuclear@0: nuclear@0: for (int i=0; i<(int)Attributes.size(); ++i) nuclear@0: if (Attributes[i].Name == n) nuclear@0: return &Attributes[i]; nuclear@0: nuclear@0: return 0; nuclear@0: } nuclear@0: nuclear@0: // replaces xml special characters in a string and creates a new one nuclear@0: core::string replaceSpecialCharacters( nuclear@0: core::string& origstr) nuclear@0: { nuclear@0: int pos = origstr.findFirst(L'&'); nuclear@0: int oldPos = 0; nuclear@0: nuclear@0: if (pos == -1) nuclear@0: return origstr; nuclear@0: nuclear@0: core::string newstr; nuclear@0: nuclear@0: while(pos != -1 && pos < origstr.size()-2) nuclear@0: { nuclear@0: // check if it is one of the special characters nuclear@0: nuclear@0: int specialChar = -1; nuclear@0: for (int i=0; i<(int)SpecialCharacters.size(); ++i) nuclear@0: { nuclear@0: const char_type* p = &origstr.c_str()[pos]+1; nuclear@0: nuclear@0: if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1)) nuclear@0: { nuclear@0: specialChar = i; nuclear@0: break; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: if (specialChar != -1) nuclear@0: { nuclear@0: newstr.append(origstr.subString(oldPos, pos - oldPos)); nuclear@0: newstr.append(SpecialCharacters[specialChar][0]); nuclear@0: pos += SpecialCharacters[specialChar].size(); nuclear@0: } nuclear@0: else nuclear@0: { nuclear@0: newstr.append(origstr.subString(oldPos, pos - oldPos + 1)); nuclear@0: pos += 1; nuclear@0: } nuclear@0: nuclear@0: // find next & nuclear@0: oldPos = pos; nuclear@0: pos = origstr.findNext(L'&', pos); nuclear@0: } nuclear@0: nuclear@0: if (oldPos < origstr.size()-1) nuclear@0: newstr.append(origstr.subString(oldPos, origstr.size()-oldPos)); nuclear@0: nuclear@0: return newstr; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: nuclear@0: //! reads the xml file and converts it into the wanted character format. nuclear@0: bool readFile(IFileReadCallBack* callback) nuclear@0: { nuclear@0: int size = callback->getSize(); nuclear@0: size += 4; // We need two terminating 0's at the end. nuclear@0: // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4. nuclear@0: nuclear@0: char* data8 = new char[size]; nuclear@0: nuclear@0: if (!callback->read(data8, size-4)) nuclear@0: { nuclear@0: delete [] data8; nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@0: // add zeros at end nuclear@0: nuclear@0: data8[size-1] = 0; nuclear@0: data8[size-2] = 0; nuclear@0: data8[size-3] = 0; nuclear@0: data8[size-4] = 0; nuclear@0: nuclear@0: char16* data16 = reinterpret_cast(data8); nuclear@0: char32* data32 = reinterpret_cast(data8); nuclear@0: nuclear@0: // now we need to convert the data to the desired target format nuclear@0: // based on the byte order mark. nuclear@0: nuclear@0: const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF; nuclear@0: const int UTF16_BE = 0xFFFE; nuclear@0: const int UTF16_LE = 0xFEFF; nuclear@0: const int UTF32_BE = 0xFFFE0000; nuclear@0: const int UTF32_LE = 0x0000FEFF; nuclear@0: nuclear@0: // check source for all utf versions and convert to target data format nuclear@0: nuclear@0: if (size >= 4 && data32[0] == (char32)UTF32_BE) nuclear@0: { nuclear@0: // UTF-32, big endian nuclear@0: SourceFormat = ETF_UTF32_BE; nuclear@0: convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header nuclear@0: } nuclear@0: else nuclear@0: if (size >= 4 && data32[0] == (char32)UTF32_LE) nuclear@0: { nuclear@0: // UTF-32, little endian nuclear@0: SourceFormat = ETF_UTF32_LE; nuclear@0: convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header nuclear@0: } nuclear@0: else nuclear@0: if (size >= 2 && data16[0] == UTF16_BE) nuclear@0: { nuclear@0: // UTF-16, big endian nuclear@0: SourceFormat = ETF_UTF16_BE; nuclear@0: convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header nuclear@0: } nuclear@0: else nuclear@0: if (size >= 2 && data16[0] == UTF16_LE) nuclear@0: { nuclear@0: // UTF-16, little endian nuclear@0: SourceFormat = ETF_UTF16_LE; nuclear@0: convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header nuclear@0: } nuclear@0: else nuclear@0: if (size >= 3 && data8[0] == UTF8[0] && data8[1] == UTF8[1] && data8[2] == UTF8[2]) nuclear@0: { nuclear@0: // UTF-8 nuclear@0: SourceFormat = ETF_UTF8; nuclear@0: convertTextData(data8+3, data8, size); // data8+3 because we need to skip the header nuclear@0: } nuclear@0: else nuclear@0: { nuclear@0: // ASCII nuclear@0: SourceFormat = ETF_ASCII; nuclear@0: convertTextData(data8, data8, size); nuclear@0: } nuclear@0: nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! converts the text file into the desired format. nuclear@0: //! \param source: begin of the text (without byte order mark) nuclear@0: //! \param pointerToStore: pointer to text data block which can be nuclear@0: //! stored or deleted based on the nesessary conversion. nuclear@0: //! \param sizeWithoutHeader: Text size in characters without header nuclear@0: template nuclear@0: void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader) nuclear@0: { nuclear@0: // convert little to big endian if necessary nuclear@0: if (sizeof(src_char_type) > 1 && nuclear@0: isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat)) nuclear@0: convertToLittleEndian(source); nuclear@0: nuclear@0: // check if conversion is necessary: nuclear@0: if (sizeof(src_char_type) == sizeof(char_type)) nuclear@0: { nuclear@0: // no need to convert nuclear@0: TextBegin = (char_type*)source; nuclear@0: TextData = (char_type*)pointerToStore; nuclear@0: TextSize = sizeWithoutHeader; nuclear@0: } nuclear@0: else nuclear@0: { nuclear@0: // convert source into target data format. nuclear@0: // TODO: implement a real conversion. This one just nuclear@0: // copies bytes. This is a problem when there are nuclear@0: // unicode symbols using more than one character. nuclear@0: nuclear@0: TextData = new char_type[sizeWithoutHeader]; nuclear@0: nuclear@0: // MSVC debugger complains here about loss of data ... nuclear@0: nuclear@0: nuclear@0: // FIXME - gcc complains about 'shift width larger than width of type' nuclear@0: // for T == unsigned long. Avoid it by messing around volatile .. nuclear@0: volatile unsigned int c = 3; nuclear@0: const src_char_type cc = (src_char_type)((((uint64_t)1u << (sizeof( char_type)< nuclear@0: void convertToLittleEndian(src_char_type* t) nuclear@0: { nuclear@0: if (sizeof(src_char_type) == 4) nuclear@0: { nuclear@0: // 32 bit nuclear@0: nuclear@0: while(*t) nuclear@0: { nuclear@0: *t = ((*t & 0xff000000) >> 24) | nuclear@0: ((*t & 0x00ff0000) >> 8) | nuclear@0: ((*t & 0x0000ff00) << 8) | nuclear@0: ((*t & 0x000000ff) << 24); nuclear@0: ++t; nuclear@0: } nuclear@0: } nuclear@0: else nuclear@0: { nuclear@0: // 16 bit nuclear@0: nuclear@0: while(*t) nuclear@0: { nuclear@0: *t = (*t >> 8) | (*t << 8); nuclear@0: ++t; nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: //! returns if a format is little endian nuclear@0: inline bool isLittleEndian(ETEXT_FORMAT f) nuclear@0: { nuclear@0: return f == ETF_ASCII || nuclear@0: f == ETF_UTF8 || nuclear@0: f == ETF_UTF16_LE || nuclear@0: f == ETF_UTF32_LE; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! returns true if a character is whitespace nuclear@0: inline bool isWhiteSpace(char_type c) nuclear@0: { nuclear@0: return (c==' ' || c=='\t' || c=='\n' || c=='\r'); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! generates a list with xml special characters nuclear@0: void createSpecialCharacterList() nuclear@0: { nuclear@0: // list of strings containing special symbols, nuclear@0: // the first character is the special character, nuclear@0: // the following is the symbol string without trailing &. nuclear@0: nuclear@0: SpecialCharacters.push_back("&"); nuclear@0: SpecialCharacters.push_back("gt;"); nuclear@0: SpecialCharacters.push_back("\"quot;"); nuclear@0: SpecialCharacters.push_back("'apos;"); nuclear@0: nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! compares the first n characters of the strings nuclear@0: bool equalsn(const char_type* str1, const char_type* str2, int len) nuclear@0: { nuclear@0: int i; nuclear@0: for(i=0; str1[i] && str2[i] && i < len; ++i) nuclear@0: if (str1[i] != str2[i]) nuclear@0: return false; nuclear@0: nuclear@0: // if one (or both) of the strings was smaller then they nuclear@0: // are only equal if they have the same lenght nuclear@0: return (i == len) || (str1[i] == 0 && str2[i] == 0); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //! stores the target text format nuclear@0: void storeTargetFormat() nuclear@0: { nuclear@0: // get target format. We could have done this using template specialization, nuclear@0: // but VisualStudio 6 don't like it and we want to support it. nuclear@0: nuclear@0: switch(sizeof(char_type)) nuclear@0: { nuclear@0: case 1: nuclear@0: TargetFormat = ETF_UTF8; nuclear@0: break; nuclear@0: case 2: nuclear@0: TargetFormat = ETF_UTF16_LE; nuclear@0: break; nuclear@0: case 4: nuclear@0: TargetFormat = ETF_UTF32_LE; nuclear@0: break; nuclear@0: default: nuclear@0: TargetFormat = ETF_ASCII; // should never happen. nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // instance variables: nuclear@0: nuclear@0: char_type* TextData; // data block of the text file nuclear@0: char_type* P; // current point in text to parse nuclear@0: char_type* TextBegin; // start of text to parse nuclear@0: unsigned int TextSize; // size of text to parse in characters, not bytes nuclear@0: nuclear@0: EXML_NODE CurrentNodeType; // type of the currently parsed node nuclear@0: ETEXT_FORMAT SourceFormat; // source format of the xml file nuclear@0: ETEXT_FORMAT TargetFormat; // output format of this parser nuclear@0: nuclear@0: core::string NodeName; // name of the node currently in nuclear@0: core::string EmptyString; // empty string to be returned by getSafe() methods nuclear@0: nuclear@0: bool IsEmptyElement; // is the currently parsed node empty? nuclear@0: nuclear@0: core::array< core::string > SpecialCharacters; // see createSpecialCharacterList() nuclear@0: nuclear@0: core::array Attributes; // attributes of current element nuclear@0: nuclear@0: }; // end CXMLReaderImpl nuclear@0: nuclear@0: nuclear@0: } // end namespace nuclear@0: } // end namespace nuclear@0: nuclear@0: #endif