vrshoot

diff libs/assimp/irrXML/CXMLReaderImpl.h @ 0:b2f14e535253
initial commit
author: John Tsiombikas <nuclear@member.fsf.org>
date: Sat, 01 Feb 2014 19:58:19 +0200
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libs/assimp/irrXML/CXMLReaderImpl.h	Sat Feb 01 19:58:19 2014 +0200
     1.3 @@ -0,0 +1,809 @@
     1.4 +// Copyright (C) 2002-2005 Nikolaus Gebhardt
     1.5 +// This file is part of the "Irrlicht Engine" and the "irrXML" project.
     1.6 +// For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h
     1.7 +
     1.8 +#ifndef __ICXML_READER_IMPL_H_INCLUDED__
     1.9 +#define __ICXML_READER_IMPL_H_INCLUDED__
    1.10 +
    1.11 +#include "irrXML.h"
    1.12 +#include "irrString.h"
    1.13 +#include "irrArray.h"
    1.14 +
    1.15 +using namespace Assimp;
    1.16 +
    1.17 +#ifdef _DEBUG
    1.18 +#define IRR_DEBUGPRINT(x) printf((x));
    1.19 +#else // _DEBUG 
    1.20 +#define IRR_DEBUGPRINT(x)
    1.21 +#endif // _DEBUG
    1.22 +
    1.23 +
    1.24 +namespace irr
    1.25 +{
    1.26 +namespace io
    1.27 +{
    1.28 +
    1.29 +
    1.30 +//! implementation of the IrrXMLReader
    1.31 +template<class char_type, class superclass>
    1.32 +class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>
    1.33 +{
    1.34 +public:
    1.35 +
    1.36 +	//! Constructor
    1.37 +	CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)
    1.38 +		: TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE),
    1.39 +		SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII)
    1.40 +	{
    1.41 +		if (!callback)
    1.42 +			return;
    1.43 +
    1.44 +		storeTargetFormat();
    1.45 +
    1.46 +		// read whole xml file
    1.47 +
    1.48 +		readFile(callback);
    1.49 +		
    1.50 +		// clean up
    1.51 +
    1.52 +		if (deleteCallBack)
    1.53 +			delete callback;
    1.54 +
    1.55 +		// create list with special characters
    1.56 +
    1.57 +		createSpecialCharacterList();
    1.58 +
    1.59 +		// set pointer to text begin
    1.60 +		P = TextBegin;
    1.61 +	}
    1.62 +    	
    1.63 +
    1.64 +	//! Destructor
    1.65 +	virtual ~CXMLReaderImpl()
    1.66 +	{
    1.67 +		delete [] TextData;
    1.68 +	}
    1.69 +
    1.70 +
    1.71 +	//! Reads forward to the next xml node. 
    1.72 +	//! \return Returns false, if there was no further node. 
    1.73 +	virtual bool read()
    1.74 +	{
    1.75 +		// if not end reached, parse the node
    1.76 +		if (P && (unsigned int)(P - TextBegin) < TextSize - 1 && *P != 0)
    1.77 +		{
    1.78 +			parseCurrentNode();
    1.79 +			return true;
    1.80 +		}
    1.81 +
    1.82 +		_IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;
    1.83 +		return false;
    1.84 +	}
    1.85 +
    1.86 +
    1.87 +	//! Returns the type of the current XML node.
    1.88 +	virtual EXML_NODE getNodeType() const
    1.89 +	{
    1.90 +		return CurrentNodeType;
    1.91 +	}
    1.92 +
    1.93 +
    1.94 +	//! Returns attribute count of the current XML node.
    1.95 +	virtual int getAttributeCount() const
    1.96 +	{
    1.97 +		return Attributes.size();
    1.98 +	}
    1.99 +
   1.100 +
   1.101 +	//! Returns name of an attribute.
   1.102 +	virtual const char_type* getAttributeName(int idx) const
   1.103 +	{
   1.104 +		if (idx < 0 || idx >= (int)Attributes.size())
   1.105 +			return 0;
   1.106 +
   1.107 +		return Attributes[idx].Name.c_str();
   1.108 +	}
   1.109 +
   1.110 +
   1.111 +	//! Returns the value of an attribute. 
   1.112 +	virtual const char_type* getAttributeValue(int idx) const
   1.113 +	{
   1.114 +		if (idx < 0 || idx >= (int)Attributes.size())
   1.115 +			return 0;
   1.116 +
   1.117 +		return Attributes[idx].Value.c_str();
   1.118 +	}
   1.119 +
   1.120 +
   1.121 +	//! Returns the value of an attribute. 
   1.122 +	virtual const char_type* getAttributeValue(const char_type* name) const
   1.123 +	{
   1.124 +		const SAttribute* attr = getAttributeByName(name);
   1.125 +		if (!attr)
   1.126 +			return 0;
   1.127 +
   1.128 +		return attr->Value.c_str();
   1.129 +	}
   1.130 +
   1.131 +
   1.132 +	//! Returns the value of an attribute
   1.133 +	virtual const char_type* getAttributeValueSafe(const char_type* name) const
   1.134 +	{
   1.135 +		const SAttribute* attr = getAttributeByName(name);
   1.136 +		if (!attr)
   1.137 +			return EmptyString.c_str();
   1.138 +
   1.139 +		return attr->Value.c_str();
   1.140 +	}
   1.141 +
   1.142 +
   1.143 +
   1.144 +	//! Returns the value of an attribute as integer. 
   1.145 +	int getAttributeValueAsInt(const char_type* name) const
   1.146 +	{
   1.147 +		return (int)getAttributeValueAsFloat(name);
   1.148 +	}
   1.149 +
   1.150 +
   1.151 +	//! Returns the value of an attribute as integer. 
   1.152 +	int getAttributeValueAsInt(int idx) const
   1.153 +	{
   1.154 +		return (int)getAttributeValueAsFloat(idx);
   1.155 +	}
   1.156 +
   1.157 +
   1.158 +	//! Returns the value of an attribute as float. 
   1.159 +	float getAttributeValueAsFloat(const char_type* name) const
   1.160 +	{
   1.161 +		const SAttribute* attr = getAttributeByName(name);
   1.162 +		if (!attr)
   1.163 +			return 0;
   1.164 +
   1.165 +		core::stringc c = attr->Value.c_str();
   1.166 +		return fast_atof(c.c_str());
   1.167 +	}
   1.168 +
   1.169 +
   1.170 +	//! Returns the value of an attribute as float. 
   1.171 +	float getAttributeValueAsFloat(int idx) const
   1.172 +	{
   1.173 +		const char_type* attrvalue = getAttributeValue(idx);
   1.174 +		if (!attrvalue)
   1.175 +			return 0;
   1.176 +
   1.177 +		core::stringc c = attrvalue;
   1.178 +		return fast_atof(c.c_str());
   1.179 +	}
   1.180 +
   1.181 +
   1.182 +	//! Returns the name of the current node.
   1.183 +	virtual const char_type* getNodeName() const
   1.184 +	{
   1.185 +		return NodeName.c_str();
   1.186 +	}
   1.187 +
   1.188 +
   1.189 +	//! Returns data of the current node.
   1.190 +	virtual const char_type* getNodeData() const
   1.191 +	{
   1.192 +		return NodeName.c_str();
   1.193 +	}
   1.194 +
   1.195 +
   1.196 +	//! Returns if an element is an empty element, like <foo />
   1.197 +	virtual bool isEmptyElement() const
   1.198 +	{
   1.199 +		return IsEmptyElement;
   1.200 +	}
   1.201 +
   1.202 +	//! Returns format of the source xml file.
   1.203 +	virtual ETEXT_FORMAT getSourceFormat() const
   1.204 +	{
   1.205 +		return SourceFormat;
   1.206 +	}
   1.207 +
   1.208 +	//! Returns format of the strings returned by the parser.
   1.209 +	virtual ETEXT_FORMAT getParserFormat() const
   1.210 +	{
   1.211 +		return TargetFormat;
   1.212 +	}
   1.213 +
   1.214 +private:
   1.215 +
   1.216 +	// Reads the current xml node
   1.217 +	void parseCurrentNode()
   1.218 +	{
   1.219 +		char_type* start = P;
   1.220 +
   1.221 +		// more forward until '<' found
   1.222 +		while(*P != L'<' && *P)
   1.223 +			++P;
   1.224 +
   1.225 +		if (!*P)
   1.226 +			return;
   1.227 +
   1.228 +		if (P - start > 0)
   1.229 +		{
   1.230 +			// we found some text, store it
   1.231 +			if (setText(start, P))
   1.232 +				return;
   1.233 +		}
   1.234 +
   1.235 +		++P;
   1.236 +
   1.237 +		// based on current token, parse and report next element
   1.238 +		switch(*P)
   1.239 +		{
   1.240 +		case L'/':
   1.241 +			parseClosingXMLElement(); 
   1.242 +			break;
   1.243 +		case L'?':
   1.244 +			ignoreDefinition();	
   1.245 +			break;
   1.246 +		case L'!':
   1.247 +			if (!parseCDATA())
   1.248 +				parseComment();	
   1.249 +			break;
   1.250 +		default:
   1.251 +			parseOpeningXMLElement();
   1.252 +			break;
   1.253 +		}
   1.254 +	}
   1.255 +
   1.256 +
   1.257 +	//! sets the state that text was found. Returns true if set should be set
   1.258 +	bool setText(char_type* start, char_type* end)
   1.259 +	{
   1.260 +		// check if text is more than 2 characters, and if not, check if there is 
   1.261 +		// only white space, so that this text won't be reported
   1.262 +		if (end - start < 3)
   1.263 +		{
   1.264 +			char_type* p = start;
   1.265 +			for(; p != end; ++p)
   1.266 +				if (!isWhiteSpace(*p))
   1.267 +					break;
   1.268 +
   1.269 +			if (p == end)
   1.270 +				return false;
   1.271 +		}
   1.272 +
   1.273 +		// set current text to the parsed text, and replace xml special characters
   1.274 +		core::string<char_type> s(start, (int)(end - start));
   1.275 +		NodeName = replaceSpecialCharacters(s);
   1.276 +
   1.277 +		// current XML node type is text
   1.278 +		CurrentNodeType = EXN_TEXT;
   1.279 +
   1.280 +		return true;
   1.281 +	}
   1.282 +
   1.283 +
   1.284 +
   1.285 +	//! ignores an xml definition like <?xml something />
   1.286 +	void ignoreDefinition()
   1.287 +	{
   1.288 +		CurrentNodeType = EXN_UNKNOWN;
   1.289 +
   1.290 +		// move until end marked with '>' reached
   1.291 +		while(*P != L'>')
   1.292 +			++P;
   1.293 +
   1.294 +		++P;
   1.295 +	}
   1.296 +
   1.297 +
   1.298 +	//! parses a comment
   1.299 +	void parseComment()
   1.300 +	{
   1.301 +		CurrentNodeType = EXN_COMMENT;
   1.302 +		P += 1;
   1.303 +
   1.304 +		char_type *pCommentBegin = P;
   1.305 +
   1.306 +		int count = 1;
   1.307 +
   1.308 +		// move until end of comment reached
   1.309 +		while(count)
   1.310 +		{
   1.311 +			if (*P == L'>')
   1.312 +				--count;
   1.313 +			else
   1.314 +			if (*P == L'<')
   1.315 +				++count;
   1.316 +
   1.317 +			++P;
   1.318 +		}
   1.319 +
   1.320 +		P -= 3;
   1.321 +		NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));
   1.322 +		P += 3;
   1.323 +	}
   1.324 +
   1.325 +
   1.326 +	//! parses an opening xml element and reads attributes
   1.327 +	void parseOpeningXMLElement()
   1.328 +	{
   1.329 +		CurrentNodeType = EXN_ELEMENT;
   1.330 +		IsEmptyElement = false;
   1.331 +		Attributes.clear();
   1.332 +
   1.333 +		// find name
   1.334 +		const char_type* startName = P;
   1.335 +
   1.336 +		// find end of element
   1.337 +		while(*P != L'>' && !isWhiteSpace(*P))
   1.338 +			++P;
   1.339 +
   1.340 +		const char_type* endName = P;
   1.341 +
   1.342 +		// find Attributes
   1.343 +		while(*P != L'>')
   1.344 +		{
   1.345 +			if (isWhiteSpace(*P))
   1.346 +				++P;
   1.347 +			else
   1.348 +			{
   1.349 +				if (*P != L'/')
   1.350 +				{
   1.351 +					// we've got an attribute
   1.352 +
   1.353 +					// read the attribute names
   1.354 +					const char_type* attributeNameBegin = P;
   1.355 +
   1.356 +					while(!isWhiteSpace(*P) && *P != L'=')
   1.357 +						++P;
   1.358 +
   1.359 +					const char_type* attributeNameEnd = P;
   1.360 +					++P;
   1.361 +
   1.362 +					// read the attribute value
   1.363 +					// check for quotes and single quotes, thx to murphy
   1.364 +					while( (*P != L'\"') && (*P != L'\'') && *P) 
   1.365 +						++P;
   1.366 +
   1.367 +					if (!*P) // malformatted xml file
   1.368 +						return;
   1.369 +
   1.370 +					const char_type attributeQuoteChar = *P;
   1.371 +
   1.372 +					++P;
   1.373 +					const char_type* attributeValueBegin = P;
   1.374 +					
   1.375 +					while(*P != attributeQuoteChar && *P)
   1.376 +						++P;
   1.377 +
   1.378 +					if (!*P) // malformatted xml file
   1.379 +						return;
   1.380 +
   1.381 +					const char_type* attributeValueEnd = P;
   1.382 +					++P;
   1.383 +
   1.384 +					SAttribute attr;
   1.385 +					attr.Name = core::string<char_type>(attributeNameBegin, 
   1.386 +						(int)(attributeNameEnd - attributeNameBegin));
   1.387 +
   1.388 +					core::string<char_type> s(attributeValueBegin, 
   1.389 +						(int)(attributeValueEnd - attributeValueBegin));
   1.390 +
   1.391 +					attr.Value = replaceSpecialCharacters(s);
   1.392 +					Attributes.push_back(attr);
   1.393 +				}
   1.394 +				else
   1.395 +				{
   1.396 +					// tag is closed directly
   1.397 +					++P;
   1.398 +					IsEmptyElement = true;
   1.399 +					break;
   1.400 +				}
   1.401 +			}
   1.402 +		}
   1.403 +
   1.404 +		// check if this tag is closing directly
   1.405 +		if (endName > startName && *(endName-1) == L'/')
   1.406 +		{
   1.407 +			// directly closing tag
   1.408 +			IsEmptyElement = true;
   1.409 +			endName--;
   1.410 +		}
   1.411 +		
   1.412 +		NodeName = core::string<char_type>(startName, (int)(endName - startName));
   1.413 +
   1.414 +		++P;
   1.415 +	}
   1.416 +
   1.417 +
   1.418 +	//! parses an closing xml tag
   1.419 +	void parseClosingXMLElement()
   1.420 +	{
   1.421 +		CurrentNodeType = EXN_ELEMENT_END;
   1.422 +		IsEmptyElement = false;
   1.423 +		Attributes.clear();
   1.424 +
   1.425 +		++P;
   1.426 +		const char_type* pBeginClose = P;
   1.427 +
   1.428 +		while(*P != L'>')
   1.429 +			++P;
   1.430 +
   1.431 +    // remove trailing whitespace, if any
   1.432 +    while( isspace( P[-1]))
   1.433 +      --P;
   1.434 +
   1.435 +		NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));
   1.436 +		++P;
   1.437 +	}
   1.438 +
   1.439 +	//! parses a possible CDATA section, returns false if begin was not a CDATA section
   1.440 +	bool parseCDATA()
   1.441 +	{
   1.442 +		if (*(P+1) != L'[')
   1.443 +			return false;
   1.444 +
   1.445 +		CurrentNodeType = EXN_CDATA;
   1.446 +
   1.447 +		// skip '<![CDATA['
   1.448 +		int count=0;
   1.449 +		while( *P && count<8 )
   1.450 +		{
   1.451 +			++P;
   1.452 +			++count;
   1.453 +		}
   1.454 +
   1.455 +		if (!*P)
   1.456 +			return true;
   1.457 +
   1.458 +		char_type *cDataBegin = P;
   1.459 +		char_type *cDataEnd = 0;
   1.460 +
   1.461 +		// find end of CDATA
   1.462 +		while(*P && !cDataEnd)
   1.463 +		{
   1.464 +			if (*P == L'>' && 
   1.465 +			   (*(P-1) == L']') &&
   1.466 +			   (*(P-2) == L']'))
   1.467 +			{
   1.468 +				cDataEnd = P - 2;
   1.469 +			}
   1.470 +
   1.471 +			++P;
   1.472 +		}
   1.473 +
   1.474 +		if ( cDataEnd )
   1.475 +			NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));
   1.476 +		else
   1.477 +			NodeName = "";
   1.478 +
   1.479 +		return true;
   1.480 +	}
   1.481 +
   1.482 +
   1.483 +	// structure for storing attribute-name pairs
   1.484 +	struct SAttribute
   1.485 +	{
   1.486 +		core::string<char_type> Name;
   1.487 +		core::string<char_type> Value;
   1.488 +	};
   1.489 +
   1.490 +	// finds a current attribute by name, returns 0 if not found
   1.491 +	const SAttribute* getAttributeByName(const char_type* name) const
   1.492 +	{
   1.493 +		if (!name)
   1.494 +			return 0;
   1.495 +
   1.496 +		core::string<char_type> n = name;
   1.497 +
   1.498 +		for (int i=0; i<(int)Attributes.size(); ++i)
   1.499 +			if (Attributes[i].Name == n)
   1.500 +				return &Attributes[i];
   1.501 +
   1.502 +		return 0;
   1.503 +	}
   1.504 +
   1.505 +	// replaces xml special characters in a string and creates a new one
   1.506 +	core::string<char_type> replaceSpecialCharacters(
   1.507 +		core::string<char_type>& origstr)
   1.508 +	{
   1.509 +		int pos = origstr.findFirst(L'&');
   1.510 +		int oldPos = 0;
   1.511 +
   1.512 +		if (pos == -1)
   1.513 +			return origstr;
   1.514 +
   1.515 +		core::string<char_type> newstr;
   1.516 +
   1.517 +		while(pos != -1 && pos < origstr.size()-2)
   1.518 +		{
   1.519 +			// check if it is one of the special characters
   1.520 +
   1.521 +			int specialChar = -1;
   1.522 +			for (int i=0; i<(int)SpecialCharacters.size(); ++i)
   1.523 +			{
   1.524 +				const char_type* p = &origstr.c_str()[pos]+1;
   1.525 +
   1.526 +				if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))
   1.527 +				{
   1.528 +					specialChar = i;
   1.529 +					break;
   1.530 +				}
   1.531 +			}
   1.532 +
   1.533 +			if (specialChar != -1)
   1.534 +			{
   1.535 +				newstr.append(origstr.subString(oldPos, pos - oldPos));
   1.536 +				newstr.append(SpecialCharacters[specialChar][0]);
   1.537 +				pos += SpecialCharacters[specialChar].size();
   1.538 +			}
   1.539 +			else
   1.540 +			{
   1.541 +				newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
   1.542 +				pos += 1;
   1.543 +			}
   1.544 +
   1.545 +			// find next &
   1.546 +			oldPos = pos;
   1.547 +			pos = origstr.findNext(L'&', pos);		
   1.548 +		}
   1.549 +
   1.550 +		if (oldPos < origstr.size()-1)
   1.551 +			newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));
   1.552 +
   1.553 +		return newstr;
   1.554 +	}
   1.555 +
   1.556 +
   1.557 +
   1.558 +	//! reads the xml file and converts it into the wanted character format.
   1.559 +	bool readFile(IFileReadCallBack* callback)
   1.560 +	{
   1.561 +		int size = callback->getSize();		
   1.562 +		size += 4; // We need two terminating 0's at the end.
   1.563 +		           // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4.
   1.564 +
   1.565 +		char* data8 = new char[size];
   1.566 +
   1.567 +		if (!callback->read(data8, size-4))
   1.568 +		{
   1.569 +			delete [] data8;
   1.570 +			return false;
   1.571 +		}
   1.572 +
   1.573 +		// add zeros at end
   1.574 +
   1.575 +		data8[size-1] = 0;
   1.576 +		data8[size-2] = 0;
   1.577 +		data8[size-3] = 0;
   1.578 +		data8[size-4] = 0;
   1.579 +
   1.580 +		char16* data16 = reinterpret_cast<char16*>(data8);
   1.581 +		char32* data32 = reinterpret_cast<char32*>(data8);	
   1.582 +
   1.583 +		// now we need to convert the data to the desired target format
   1.584 +		// based on the byte order mark.
   1.585 +
   1.586 +		const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF;
   1.587 +		const int UTF16_BE = 0xFFFE;
   1.588 +		const int UTF16_LE = 0xFEFF;
   1.589 +		const int UTF32_BE = 0xFFFE0000;
   1.590 +		const int UTF32_LE = 0x0000FEFF;
   1.591 +
   1.592 +		// check source for all utf versions and convert to target data format
   1.593 +		
   1.594 +		if (size >= 4 && data32[0] == (char32)UTF32_BE)
   1.595 +		{
   1.596 +			// UTF-32, big endian
   1.597 +			SourceFormat = ETF_UTF32_BE;
   1.598 +			convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
   1.599 +		}
   1.600 +		else
   1.601 +		if (size >= 4 && data32[0] == (char32)UTF32_LE)
   1.602 +		{
   1.603 +			// UTF-32, little endian
   1.604 +			SourceFormat = ETF_UTF32_LE;
   1.605 +			convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
   1.606 +		}
   1.607 +		else
   1.608 +		if (size >= 2 && data16[0] == UTF16_BE)
   1.609 +		{
   1.610 +			// UTF-16, big endian
   1.611 +			SourceFormat = ETF_UTF16_BE;
   1.612 +			convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
   1.613 +		}
   1.614 +		else
   1.615 +		if (size >= 2 && data16[0] == UTF16_LE)
   1.616 +		{
   1.617 +			// UTF-16, little endian
   1.618 +			SourceFormat = ETF_UTF16_LE;
   1.619 +			convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
   1.620 +		}
   1.621 +		else
   1.622 +		if (size >= 3 && data8[0] == UTF8[0] && data8[1] == UTF8[1] && data8[2] == UTF8[2])
   1.623 +		{
   1.624 +			// UTF-8
   1.625 +			SourceFormat = ETF_UTF8;
   1.626 +			convertTextData(data8+3, data8, size); // data8+3 because we need to skip the header
   1.627 +		}
   1.628 +		else
   1.629 +		{
   1.630 +			// ASCII
   1.631 +			SourceFormat = ETF_ASCII;
   1.632 +			convertTextData(data8, data8, size);
   1.633 +		}
   1.634 +
   1.635 +		return true;
   1.636 +	}
   1.637 +
   1.638 +
   1.639 +	//! converts the text file into the desired format.
   1.640 +	//! \param source: begin of the text (without byte order mark)
   1.641 +	//! \param pointerToStore: pointer to text data block which can be
   1.642 +	//! stored or deleted based on the nesessary conversion.
   1.643 +	//! \param sizeWithoutHeader: Text size in characters without header
   1.644 +	template<class src_char_type>
   1.645 +	void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)
   1.646 +	{
   1.647 +		// convert little to big endian if necessary
   1.648 +		if (sizeof(src_char_type) > 1 && 
   1.649 +			isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))
   1.650 +			convertToLittleEndian(source);
   1.651 +
   1.652 +		// check if conversion is necessary:
   1.653 +		if (sizeof(src_char_type) == sizeof(char_type))
   1.654 +		{
   1.655 +			// no need to convert
   1.656 +			TextBegin = (char_type*)source;
   1.657 +			TextData = (char_type*)pointerToStore;
   1.658 +			TextSize = sizeWithoutHeader;
   1.659 +		}
   1.660 +		else
   1.661 +		{
   1.662 +			// convert source into target data format. 
   1.663 +			// TODO: implement a real conversion. This one just 
   1.664 +			// copies bytes. This is a problem when there are 
   1.665 +			// unicode symbols using more than one character.
   1.666 +
   1.667 +			TextData = new char_type[sizeWithoutHeader];
   1.668 +
   1.669 +			// MSVC debugger complains here about loss of data ...
   1.670 +
   1.671 +
   1.672 +			// FIXME - gcc complains about 'shift width larger than width of type'
   1.673 +			// for T == unsigned long. Avoid it by messing around volatile ..
   1.674 +			volatile unsigned int c = 3;
   1.675 +			const src_char_type cc = (src_char_type)((((uint64_t)1u << (sizeof( char_type)<<c)) - 1));
   1.676 +			for (int i=0; i<sizeWithoutHeader; ++i)
   1.677 +				TextData[i] = char_type( source[i] & cc); 
   1.678 +
   1.679 +			TextBegin = TextData;
   1.680 +			TextSize = sizeWithoutHeader;
   1.681 +
   1.682 +			// delete original data because no longer needed
   1.683 +			delete [] pointerToStore;
   1.684 +		}
   1.685 +	}
   1.686 +
   1.687 +	//! converts whole text buffer to little endian
   1.688 +	template<class src_char_type>
   1.689 +	void convertToLittleEndian(src_char_type* t)
   1.690 +	{
   1.691 +		if (sizeof(src_char_type) == 4) 
   1.692 +		{
   1.693 +			// 32 bit
   1.694 +
   1.695 +			while(*t)
   1.696 +			{
   1.697 +				*t = ((*t & 0xff000000) >> 24) |
   1.698 +				     ((*t & 0x00ff0000) >> 8)  |
   1.699 +				     ((*t & 0x0000ff00) << 8)  |
   1.700 +				     ((*t & 0x000000ff) << 24);
   1.701 +				++t;
   1.702 +			}
   1.703 +		}
   1.704 +		else
   1.705 +		{
   1.706 +			// 16 bit 
   1.707 +
   1.708 +			while(*t)
   1.709 +			{
   1.710 +				*t = (*t >> 8) | (*t << 8);
   1.711 +				++t;
   1.712 +			}
   1.713 +		}
   1.714 +	}
   1.715 +
   1.716 +	//! returns if a format is little endian
   1.717 +	inline bool isLittleEndian(ETEXT_FORMAT f)
   1.718 +	{
   1.719 +		return f == ETF_ASCII ||
   1.720 +		       f == ETF_UTF8 ||
   1.721 +		       f == ETF_UTF16_LE ||
   1.722 +		       f == ETF_UTF32_LE;
   1.723 +	}
   1.724 +
   1.725 +
   1.726 +	//! returns true if a character is whitespace
   1.727 +	inline bool isWhiteSpace(char_type c)
   1.728 +	{
   1.729 +		return (c==' ' || c=='\t' || c=='\n' || c=='\r');
   1.730 +	}
   1.731 +
   1.732 +
   1.733 +	//! generates a list with xml special characters
   1.734 +	void createSpecialCharacterList()
   1.735 +	{
   1.736 +		// list of strings containing special symbols, 
   1.737 +		// the first character is the special character,
   1.738 +		// the following is the symbol string without trailing &.
   1.739 +
   1.740 +		SpecialCharacters.push_back("&amp;");
   1.741 +		SpecialCharacters.push_back("<lt;");
   1.742 +		SpecialCharacters.push_back(">gt;");
   1.743 +		SpecialCharacters.push_back("\"quot;");
   1.744 +		SpecialCharacters.push_back("'apos;");
   1.745 +		
   1.746 +	}
   1.747 +
   1.748 +
   1.749 +	//! compares the first n characters of the strings
   1.750 +	bool equalsn(const char_type* str1, const char_type* str2, int len)
   1.751 +	{
   1.752 +		int i;
   1.753 +		for(i=0; str1[i] && str2[i] && i < len; ++i)
   1.754 +			if (str1[i] != str2[i])
   1.755 +				return false;
   1.756 +
   1.757 +		// if one (or both) of the strings was smaller then they
   1.758 +		// are only equal if they have the same lenght
   1.759 +		return (i == len) || (str1[i] == 0 && str2[i] == 0);
   1.760 +	}
   1.761 +
   1.762 +
   1.763 +	//! stores the target text format
   1.764 +	void storeTargetFormat()
   1.765 +	{
   1.766 +		// get target format. We could have done this using template specialization,
   1.767 +		// but VisualStudio 6 don't like it and we want to support it.
   1.768 +
   1.769 +		switch(sizeof(char_type))
   1.770 +		{
   1.771 +		case 1: 
   1.772 +			TargetFormat = ETF_UTF8;
   1.773 +			break;
   1.774 +		case 2: 
   1.775 +			TargetFormat = ETF_UTF16_LE;
   1.776 +			break;
   1.777 +		case 4: 
   1.778 +			TargetFormat = ETF_UTF32_LE;
   1.779 +			break;
   1.780 +		default:
   1.781 +			TargetFormat = ETF_ASCII; // should never happen.
   1.782 +		}
   1.783 +	}
   1.784 +
   1.785 +
   1.786 +	// instance variables:
   1.787 +
   1.788 +	char_type* TextData;         // data block of the text file
   1.789 +	char_type* P;                // current point in text to parse
   1.790 +	char_type* TextBegin;        // start of text to parse
   1.791 +	unsigned int TextSize;       // size of text to parse in characters, not bytes
   1.792 +
   1.793 +	EXML_NODE CurrentNodeType;   // type of the currently parsed node
   1.794 +	ETEXT_FORMAT SourceFormat;   // source format of the xml file
   1.795 +	ETEXT_FORMAT TargetFormat;   // output format of this parser
   1.796 +
   1.797 +	core::string<char_type> NodeName;    // name of the node currently in
   1.798 +	core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods
   1.799 +
   1.800 +	bool IsEmptyElement;       // is the currently parsed node empty?
   1.801 +
   1.802 +	core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList()
   1.803 +
   1.804 +	core::array<SAttribute> Attributes; // attributes of current element
   1.805 +	
   1.806 +}; // end CXMLReaderImpl
   1.807 +
   1.808 +
   1.809 +} // end namespace
   1.810 +} // end namespace
   1.811 +
   1.812 +#endif
author	John Tsiombikas <nuclear@member.fsf.org>
date	Sat, 01 Feb 2014 19:58:19 +0200
parents
children