rev |
line source |
nuclear@0
|
1 // Copyright (C) 2002-2005 Nikolaus Gebhardt
|
nuclear@0
|
2 // This file is part of the "Irrlicht Engine" and the "irrXML" project.
|
nuclear@0
|
3 // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h
|
nuclear@0
|
4
|
nuclear@0
|
5 #ifndef __ICXML_READER_IMPL_H_INCLUDED__
|
nuclear@0
|
6 #define __ICXML_READER_IMPL_H_INCLUDED__
|
nuclear@0
|
7
|
nuclear@0
|
8 #include "irrXML.h"
|
nuclear@0
|
9 #include "irrString.h"
|
nuclear@0
|
10 #include "irrArray.h"
|
nuclear@0
|
11
|
nuclear@0
|
12 using namespace Assimp;
|
nuclear@0
|
13
|
nuclear@0
|
14 #ifdef _DEBUG
|
nuclear@0
|
15 #define IRR_DEBUGPRINT(x) printf((x));
|
nuclear@0
|
16 #else // _DEBUG
|
nuclear@0
|
17 #define IRR_DEBUGPRINT(x)
|
nuclear@0
|
18 #endif // _DEBUG
|
nuclear@0
|
19
|
nuclear@0
|
20
|
nuclear@0
|
21 namespace irr
|
nuclear@0
|
22 {
|
nuclear@0
|
23 namespace io
|
nuclear@0
|
24 {
|
nuclear@0
|
25
|
nuclear@0
|
26
|
nuclear@0
|
27 //! implementation of the IrrXMLReader
|
nuclear@0
|
28 template<class char_type, class superclass>
|
nuclear@0
|
29 class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>
|
nuclear@0
|
30 {
|
nuclear@0
|
31 public:
|
nuclear@0
|
32
|
nuclear@0
|
33 //! Constructor
|
nuclear@0
|
34 CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)
|
nuclear@0
|
35 : TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE),
|
nuclear@0
|
36 SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII)
|
nuclear@0
|
37 {
|
nuclear@0
|
38 if (!callback)
|
nuclear@0
|
39 return;
|
nuclear@0
|
40
|
nuclear@0
|
41 storeTargetFormat();
|
nuclear@0
|
42
|
nuclear@0
|
43 // read whole xml file
|
nuclear@0
|
44
|
nuclear@0
|
45 readFile(callback);
|
nuclear@0
|
46
|
nuclear@0
|
47 // clean up
|
nuclear@0
|
48
|
nuclear@0
|
49 if (deleteCallBack)
|
nuclear@0
|
50 delete callback;
|
nuclear@0
|
51
|
nuclear@0
|
52 // create list with special characters
|
nuclear@0
|
53
|
nuclear@0
|
54 createSpecialCharacterList();
|
nuclear@0
|
55
|
nuclear@0
|
56 // set pointer to text begin
|
nuclear@0
|
57 P = TextBegin;
|
nuclear@0
|
58 }
|
nuclear@0
|
59
|
nuclear@0
|
60
|
nuclear@0
|
61 //! Destructor
|
nuclear@0
|
62 virtual ~CXMLReaderImpl()
|
nuclear@0
|
63 {
|
nuclear@0
|
64 delete [] TextData;
|
nuclear@0
|
65 }
|
nuclear@0
|
66
|
nuclear@0
|
67
|
nuclear@0
|
68 //! Reads forward to the next xml node.
|
nuclear@0
|
69 //! \return Returns false, if there was no further node.
|
nuclear@0
|
70 virtual bool read()
|
nuclear@0
|
71 {
|
nuclear@0
|
72 // if not end reached, parse the node
|
nuclear@0
|
73 if (P && (unsigned int)(P - TextBegin) < TextSize - 1 && *P != 0)
|
nuclear@0
|
74 {
|
nuclear@0
|
75 parseCurrentNode();
|
nuclear@0
|
76 return true;
|
nuclear@0
|
77 }
|
nuclear@0
|
78
|
nuclear@0
|
79 _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;
|
nuclear@0
|
80 return false;
|
nuclear@0
|
81 }
|
nuclear@0
|
82
|
nuclear@0
|
83
|
nuclear@0
|
84 //! Returns the type of the current XML node.
|
nuclear@0
|
85 virtual EXML_NODE getNodeType() const
|
nuclear@0
|
86 {
|
nuclear@0
|
87 return CurrentNodeType;
|
nuclear@0
|
88 }
|
nuclear@0
|
89
|
nuclear@0
|
90
|
nuclear@0
|
91 //! Returns attribute count of the current XML node.
|
nuclear@0
|
92 virtual int getAttributeCount() const
|
nuclear@0
|
93 {
|
nuclear@0
|
94 return Attributes.size();
|
nuclear@0
|
95 }
|
nuclear@0
|
96
|
nuclear@0
|
97
|
nuclear@0
|
98 //! Returns name of an attribute.
|
nuclear@0
|
99 virtual const char_type* getAttributeName(int idx) const
|
nuclear@0
|
100 {
|
nuclear@0
|
101 if (idx < 0 || idx >= (int)Attributes.size())
|
nuclear@0
|
102 return 0;
|
nuclear@0
|
103
|
nuclear@0
|
104 return Attributes[idx].Name.c_str();
|
nuclear@0
|
105 }
|
nuclear@0
|
106
|
nuclear@0
|
107
|
nuclear@0
|
108 //! Returns the value of an attribute.
|
nuclear@0
|
109 virtual const char_type* getAttributeValue(int idx) const
|
nuclear@0
|
110 {
|
nuclear@0
|
111 if (idx < 0 || idx >= (int)Attributes.size())
|
nuclear@0
|
112 return 0;
|
nuclear@0
|
113
|
nuclear@0
|
114 return Attributes[idx].Value.c_str();
|
nuclear@0
|
115 }
|
nuclear@0
|
116
|
nuclear@0
|
117
|
nuclear@0
|
118 //! Returns the value of an attribute.
|
nuclear@0
|
119 virtual const char_type* getAttributeValue(const char_type* name) const
|
nuclear@0
|
120 {
|
nuclear@0
|
121 const SAttribute* attr = getAttributeByName(name);
|
nuclear@0
|
122 if (!attr)
|
nuclear@0
|
123 return 0;
|
nuclear@0
|
124
|
nuclear@0
|
125 return attr->Value.c_str();
|
nuclear@0
|
126 }
|
nuclear@0
|
127
|
nuclear@0
|
128
|
nuclear@0
|
129 //! Returns the value of an attribute
|
nuclear@0
|
130 virtual const char_type* getAttributeValueSafe(const char_type* name) const
|
nuclear@0
|
131 {
|
nuclear@0
|
132 const SAttribute* attr = getAttributeByName(name);
|
nuclear@0
|
133 if (!attr)
|
nuclear@0
|
134 return EmptyString.c_str();
|
nuclear@0
|
135
|
nuclear@0
|
136 return attr->Value.c_str();
|
nuclear@0
|
137 }
|
nuclear@0
|
138
|
nuclear@0
|
139
|
nuclear@0
|
140
|
nuclear@0
|
141 //! Returns the value of an attribute as integer.
|
nuclear@0
|
142 int getAttributeValueAsInt(const char_type* name) const
|
nuclear@0
|
143 {
|
nuclear@0
|
144 return (int)getAttributeValueAsFloat(name);
|
nuclear@0
|
145 }
|
nuclear@0
|
146
|
nuclear@0
|
147
|
nuclear@0
|
148 //! Returns the value of an attribute as integer.
|
nuclear@0
|
149 int getAttributeValueAsInt(int idx) const
|
nuclear@0
|
150 {
|
nuclear@0
|
151 return (int)getAttributeValueAsFloat(idx);
|
nuclear@0
|
152 }
|
nuclear@0
|
153
|
nuclear@0
|
154
|
nuclear@0
|
155 //! Returns the value of an attribute as float.
|
nuclear@0
|
156 float getAttributeValueAsFloat(const char_type* name) const
|
nuclear@0
|
157 {
|
nuclear@0
|
158 const SAttribute* attr = getAttributeByName(name);
|
nuclear@0
|
159 if (!attr)
|
nuclear@0
|
160 return 0;
|
nuclear@0
|
161
|
nuclear@0
|
162 core::stringc c = attr->Value.c_str();
|
nuclear@0
|
163 return fast_atof(c.c_str());
|
nuclear@0
|
164 }
|
nuclear@0
|
165
|
nuclear@0
|
166
|
nuclear@0
|
167 //! Returns the value of an attribute as float.
|
nuclear@0
|
168 float getAttributeValueAsFloat(int idx) const
|
nuclear@0
|
169 {
|
nuclear@0
|
170 const char_type* attrvalue = getAttributeValue(idx);
|
nuclear@0
|
171 if (!attrvalue)
|
nuclear@0
|
172 return 0;
|
nuclear@0
|
173
|
nuclear@0
|
174 core::stringc c = attrvalue;
|
nuclear@0
|
175 return fast_atof(c.c_str());
|
nuclear@0
|
176 }
|
nuclear@0
|
177
|
nuclear@0
|
178
|
nuclear@0
|
179 //! Returns the name of the current node.
|
nuclear@0
|
180 virtual const char_type* getNodeName() const
|
nuclear@0
|
181 {
|
nuclear@0
|
182 return NodeName.c_str();
|
nuclear@0
|
183 }
|
nuclear@0
|
184
|
nuclear@0
|
185
|
nuclear@0
|
186 //! Returns data of the current node.
|
nuclear@0
|
187 virtual const char_type* getNodeData() const
|
nuclear@0
|
188 {
|
nuclear@0
|
189 return NodeName.c_str();
|
nuclear@0
|
190 }
|
nuclear@0
|
191
|
nuclear@0
|
192
|
nuclear@0
|
193 //! Returns if an element is an empty element, like <foo />
|
nuclear@0
|
194 virtual bool isEmptyElement() const
|
nuclear@0
|
195 {
|
nuclear@0
|
196 return IsEmptyElement;
|
nuclear@0
|
197 }
|
nuclear@0
|
198
|
nuclear@0
|
199 //! Returns format of the source xml file.
|
nuclear@0
|
200 virtual ETEXT_FORMAT getSourceFormat() const
|
nuclear@0
|
201 {
|
nuclear@0
|
202 return SourceFormat;
|
nuclear@0
|
203 }
|
nuclear@0
|
204
|
nuclear@0
|
205 //! Returns format of the strings returned by the parser.
|
nuclear@0
|
206 virtual ETEXT_FORMAT getParserFormat() const
|
nuclear@0
|
207 {
|
nuclear@0
|
208 return TargetFormat;
|
nuclear@0
|
209 }
|
nuclear@0
|
210
|
nuclear@0
|
211 private:
|
nuclear@0
|
212
|
nuclear@0
|
213 // Reads the current xml node
|
nuclear@0
|
214 void parseCurrentNode()
|
nuclear@0
|
215 {
|
nuclear@0
|
216 char_type* start = P;
|
nuclear@0
|
217
|
nuclear@0
|
218 // more forward until '<' found
|
nuclear@0
|
219 while(*P != L'<' && *P)
|
nuclear@0
|
220 ++P;
|
nuclear@0
|
221
|
nuclear@0
|
222 if (!*P)
|
nuclear@0
|
223 return;
|
nuclear@0
|
224
|
nuclear@0
|
225 if (P - start > 0)
|
nuclear@0
|
226 {
|
nuclear@0
|
227 // we found some text, store it
|
nuclear@0
|
228 if (setText(start, P))
|
nuclear@0
|
229 return;
|
nuclear@0
|
230 }
|
nuclear@0
|
231
|
nuclear@0
|
232 ++P;
|
nuclear@0
|
233
|
nuclear@0
|
234 // based on current token, parse and report next element
|
nuclear@0
|
235 switch(*P)
|
nuclear@0
|
236 {
|
nuclear@0
|
237 case L'/':
|
nuclear@0
|
238 parseClosingXMLElement();
|
nuclear@0
|
239 break;
|
nuclear@0
|
240 case L'?':
|
nuclear@0
|
241 ignoreDefinition();
|
nuclear@0
|
242 break;
|
nuclear@0
|
243 case L'!':
|
nuclear@0
|
244 if (!parseCDATA())
|
nuclear@0
|
245 parseComment();
|
nuclear@0
|
246 break;
|
nuclear@0
|
247 default:
|
nuclear@0
|
248 parseOpeningXMLElement();
|
nuclear@0
|
249 break;
|
nuclear@0
|
250 }
|
nuclear@0
|
251 }
|
nuclear@0
|
252
|
nuclear@0
|
253
|
nuclear@0
|
254 //! sets the state that text was found. Returns true if set should be set
|
nuclear@0
|
255 bool setText(char_type* start, char_type* end)
|
nuclear@0
|
256 {
|
nuclear@0
|
257 // check if text is more than 2 characters, and if not, check if there is
|
nuclear@0
|
258 // only white space, so that this text won't be reported
|
nuclear@0
|
259 if (end - start < 3)
|
nuclear@0
|
260 {
|
nuclear@0
|
261 char_type* p = start;
|
nuclear@0
|
262 for(; p != end; ++p)
|
nuclear@0
|
263 if (!isWhiteSpace(*p))
|
nuclear@0
|
264 break;
|
nuclear@0
|
265
|
nuclear@0
|
266 if (p == end)
|
nuclear@0
|
267 return false;
|
nuclear@0
|
268 }
|
nuclear@0
|
269
|
nuclear@0
|
270 // set current text to the parsed text, and replace xml special characters
|
nuclear@0
|
271 core::string<char_type> s(start, (int)(end - start));
|
nuclear@0
|
272 NodeName = replaceSpecialCharacters(s);
|
nuclear@0
|
273
|
nuclear@0
|
274 // current XML node type is text
|
nuclear@0
|
275 CurrentNodeType = EXN_TEXT;
|
nuclear@0
|
276
|
nuclear@0
|
277 return true;
|
nuclear@0
|
278 }
|
nuclear@0
|
279
|
nuclear@0
|
280
|
nuclear@0
|
281
|
nuclear@0
|
282 //! ignores an xml definition like <?xml something />
|
nuclear@0
|
283 void ignoreDefinition()
|
nuclear@0
|
284 {
|
nuclear@0
|
285 CurrentNodeType = EXN_UNKNOWN;
|
nuclear@0
|
286
|
nuclear@0
|
287 // move until end marked with '>' reached
|
nuclear@0
|
288 while(*P != L'>')
|
nuclear@0
|
289 ++P;
|
nuclear@0
|
290
|
nuclear@0
|
291 ++P;
|
nuclear@0
|
292 }
|
nuclear@0
|
293
|
nuclear@0
|
294
|
nuclear@0
|
295 //! parses a comment
|
nuclear@0
|
296 void parseComment()
|
nuclear@0
|
297 {
|
nuclear@0
|
298 CurrentNodeType = EXN_COMMENT;
|
nuclear@0
|
299 P += 1;
|
nuclear@0
|
300
|
nuclear@0
|
301 char_type *pCommentBegin = P;
|
nuclear@0
|
302
|
nuclear@0
|
303 int count = 1;
|
nuclear@0
|
304
|
nuclear@0
|
305 // move until end of comment reached
|
nuclear@0
|
306 while(count)
|
nuclear@0
|
307 {
|
nuclear@0
|
308 if (*P == L'>')
|
nuclear@0
|
309 --count;
|
nuclear@0
|
310 else
|
nuclear@0
|
311 if (*P == L'<')
|
nuclear@0
|
312 ++count;
|
nuclear@0
|
313
|
nuclear@0
|
314 ++P;
|
nuclear@0
|
315 }
|
nuclear@0
|
316
|
nuclear@0
|
317 P -= 3;
|
nuclear@0
|
318 NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));
|
nuclear@0
|
319 P += 3;
|
nuclear@0
|
320 }
|
nuclear@0
|
321
|
nuclear@0
|
322
|
nuclear@0
|
323 //! parses an opening xml element and reads attributes
|
nuclear@0
|
324 void parseOpeningXMLElement()
|
nuclear@0
|
325 {
|
nuclear@0
|
326 CurrentNodeType = EXN_ELEMENT;
|
nuclear@0
|
327 IsEmptyElement = false;
|
nuclear@0
|
328 Attributes.clear();
|
nuclear@0
|
329
|
nuclear@0
|
330 // find name
|
nuclear@0
|
331 const char_type* startName = P;
|
nuclear@0
|
332
|
nuclear@0
|
333 // find end of element
|
nuclear@0
|
334 while(*P != L'>' && !isWhiteSpace(*P))
|
nuclear@0
|
335 ++P;
|
nuclear@0
|
336
|
nuclear@0
|
337 const char_type* endName = P;
|
nuclear@0
|
338
|
nuclear@0
|
339 // find Attributes
|
nuclear@0
|
340 while(*P != L'>')
|
nuclear@0
|
341 {
|
nuclear@0
|
342 if (isWhiteSpace(*P))
|
nuclear@0
|
343 ++P;
|
nuclear@0
|
344 else
|
nuclear@0
|
345 {
|
nuclear@0
|
346 if (*P != L'/')
|
nuclear@0
|
347 {
|
nuclear@0
|
348 // we've got an attribute
|
nuclear@0
|
349
|
nuclear@0
|
350 // read the attribute names
|
nuclear@0
|
351 const char_type* attributeNameBegin = P;
|
nuclear@0
|
352
|
nuclear@0
|
353 while(!isWhiteSpace(*P) && *P != L'=')
|
nuclear@0
|
354 ++P;
|
nuclear@0
|
355
|
nuclear@0
|
356 const char_type* attributeNameEnd = P;
|
nuclear@0
|
357 ++P;
|
nuclear@0
|
358
|
nuclear@0
|
359 // read the attribute value
|
nuclear@0
|
360 // check for quotes and single quotes, thx to murphy
|
nuclear@0
|
361 while( (*P != L'\"') && (*P != L'\'') && *P)
|
nuclear@0
|
362 ++P;
|
nuclear@0
|
363
|
nuclear@0
|
364 if (!*P) // malformatted xml file
|
nuclear@0
|
365 return;
|
nuclear@0
|
366
|
nuclear@0
|
367 const char_type attributeQuoteChar = *P;
|
nuclear@0
|
368
|
nuclear@0
|
369 ++P;
|
nuclear@0
|
370 const char_type* attributeValueBegin = P;
|
nuclear@0
|
371
|
nuclear@0
|
372 while(*P != attributeQuoteChar && *P)
|
nuclear@0
|
373 ++P;
|
nuclear@0
|
374
|
nuclear@0
|
375 if (!*P) // malformatted xml file
|
nuclear@0
|
376 return;
|
nuclear@0
|
377
|
nuclear@0
|
378 const char_type* attributeValueEnd = P;
|
nuclear@0
|
379 ++P;
|
nuclear@0
|
380
|
nuclear@0
|
381 SAttribute attr;
|
nuclear@0
|
382 attr.Name = core::string<char_type>(attributeNameBegin,
|
nuclear@0
|
383 (int)(attributeNameEnd - attributeNameBegin));
|
nuclear@0
|
384
|
nuclear@0
|
385 core::string<char_type> s(attributeValueBegin,
|
nuclear@0
|
386 (int)(attributeValueEnd - attributeValueBegin));
|
nuclear@0
|
387
|
nuclear@0
|
388 attr.Value = replaceSpecialCharacters(s);
|
nuclear@0
|
389 Attributes.push_back(attr);
|
nuclear@0
|
390 }
|
nuclear@0
|
391 else
|
nuclear@0
|
392 {
|
nuclear@0
|
393 // tag is closed directly
|
nuclear@0
|
394 ++P;
|
nuclear@0
|
395 IsEmptyElement = true;
|
nuclear@0
|
396 break;
|
nuclear@0
|
397 }
|
nuclear@0
|
398 }
|
nuclear@0
|
399 }
|
nuclear@0
|
400
|
nuclear@0
|
401 // check if this tag is closing directly
|
nuclear@0
|
402 if (endName > startName && *(endName-1) == L'/')
|
nuclear@0
|
403 {
|
nuclear@0
|
404 // directly closing tag
|
nuclear@0
|
405 IsEmptyElement = true;
|
nuclear@0
|
406 endName--;
|
nuclear@0
|
407 }
|
nuclear@0
|
408
|
nuclear@0
|
409 NodeName = core::string<char_type>(startName, (int)(endName - startName));
|
nuclear@0
|
410
|
nuclear@0
|
411 ++P;
|
nuclear@0
|
412 }
|
nuclear@0
|
413
|
nuclear@0
|
414
|
nuclear@0
|
415 //! parses an closing xml tag
|
nuclear@0
|
416 void parseClosingXMLElement()
|
nuclear@0
|
417 {
|
nuclear@0
|
418 CurrentNodeType = EXN_ELEMENT_END;
|
nuclear@0
|
419 IsEmptyElement = false;
|
nuclear@0
|
420 Attributes.clear();
|
nuclear@0
|
421
|
nuclear@0
|
422 ++P;
|
nuclear@0
|
423 const char_type* pBeginClose = P;
|
nuclear@0
|
424
|
nuclear@0
|
425 while(*P != L'>')
|
nuclear@0
|
426 ++P;
|
nuclear@0
|
427
|
nuclear@0
|
428 // remove trailing whitespace, if any
|
nuclear@0
|
429 while( isspace( P[-1]))
|
nuclear@0
|
430 --P;
|
nuclear@0
|
431
|
nuclear@0
|
432 NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));
|
nuclear@0
|
433 ++P;
|
nuclear@0
|
434 }
|
nuclear@0
|
435
|
nuclear@0
|
436 //! parses a possible CDATA section, returns false if begin was not a CDATA section
|
nuclear@0
|
437 bool parseCDATA()
|
nuclear@0
|
438 {
|
nuclear@0
|
439 if (*(P+1) != L'[')
|
nuclear@0
|
440 return false;
|
nuclear@0
|
441
|
nuclear@0
|
442 CurrentNodeType = EXN_CDATA;
|
nuclear@0
|
443
|
nuclear@0
|
444 // skip '<![CDATA['
|
nuclear@0
|
445 int count=0;
|
nuclear@0
|
446 while( *P && count<8 )
|
nuclear@0
|
447 {
|
nuclear@0
|
448 ++P;
|
nuclear@0
|
449 ++count;
|
nuclear@0
|
450 }
|
nuclear@0
|
451
|
nuclear@0
|
452 if (!*P)
|
nuclear@0
|
453 return true;
|
nuclear@0
|
454
|
nuclear@0
|
455 char_type *cDataBegin = P;
|
nuclear@0
|
456 char_type *cDataEnd = 0;
|
nuclear@0
|
457
|
nuclear@0
|
458 // find end of CDATA
|
nuclear@0
|
459 while(*P && !cDataEnd)
|
nuclear@0
|
460 {
|
nuclear@0
|
461 if (*P == L'>' &&
|
nuclear@0
|
462 (*(P-1) == L']') &&
|
nuclear@0
|
463 (*(P-2) == L']'))
|
nuclear@0
|
464 {
|
nuclear@0
|
465 cDataEnd = P - 2;
|
nuclear@0
|
466 }
|
nuclear@0
|
467
|
nuclear@0
|
468 ++P;
|
nuclear@0
|
469 }
|
nuclear@0
|
470
|
nuclear@0
|
471 if ( cDataEnd )
|
nuclear@0
|
472 NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));
|
nuclear@0
|
473 else
|
nuclear@0
|
474 NodeName = "";
|
nuclear@0
|
475
|
nuclear@0
|
476 return true;
|
nuclear@0
|
477 }
|
nuclear@0
|
478
|
nuclear@0
|
479
|
nuclear@0
|
480 // structure for storing attribute-name pairs
|
nuclear@0
|
481 struct SAttribute
|
nuclear@0
|
482 {
|
nuclear@0
|
483 core::string<char_type> Name;
|
nuclear@0
|
484 core::string<char_type> Value;
|
nuclear@0
|
485 };
|
nuclear@0
|
486
|
nuclear@0
|
487 // finds a current attribute by name, returns 0 if not found
|
nuclear@0
|
488 const SAttribute* getAttributeByName(const char_type* name) const
|
nuclear@0
|
489 {
|
nuclear@0
|
490 if (!name)
|
nuclear@0
|
491 return 0;
|
nuclear@0
|
492
|
nuclear@0
|
493 core::string<char_type> n = name;
|
nuclear@0
|
494
|
nuclear@0
|
495 for (int i=0; i<(int)Attributes.size(); ++i)
|
nuclear@0
|
496 if (Attributes[i].Name == n)
|
nuclear@0
|
497 return &Attributes[i];
|
nuclear@0
|
498
|
nuclear@0
|
499 return 0;
|
nuclear@0
|
500 }
|
nuclear@0
|
501
|
nuclear@0
|
502 // replaces xml special characters in a string and creates a new one
|
nuclear@0
|
503 core::string<char_type> replaceSpecialCharacters(
|
nuclear@0
|
504 core::string<char_type>& origstr)
|
nuclear@0
|
505 {
|
nuclear@0
|
506 int pos = origstr.findFirst(L'&');
|
nuclear@0
|
507 int oldPos = 0;
|
nuclear@0
|
508
|
nuclear@0
|
509 if (pos == -1)
|
nuclear@0
|
510 return origstr;
|
nuclear@0
|
511
|
nuclear@0
|
512 core::string<char_type> newstr;
|
nuclear@0
|
513
|
nuclear@0
|
514 while(pos != -1 && pos < origstr.size()-2)
|
nuclear@0
|
515 {
|
nuclear@0
|
516 // check if it is one of the special characters
|
nuclear@0
|
517
|
nuclear@0
|
518 int specialChar = -1;
|
nuclear@0
|
519 for (int i=0; i<(int)SpecialCharacters.size(); ++i)
|
nuclear@0
|
520 {
|
nuclear@0
|
521 const char_type* p = &origstr.c_str()[pos]+1;
|
nuclear@0
|
522
|
nuclear@0
|
523 if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))
|
nuclear@0
|
524 {
|
nuclear@0
|
525 specialChar = i;
|
nuclear@0
|
526 break;
|
nuclear@0
|
527 }
|
nuclear@0
|
528 }
|
nuclear@0
|
529
|
nuclear@0
|
530 if (specialChar != -1)
|
nuclear@0
|
531 {
|
nuclear@0
|
532 newstr.append(origstr.subString(oldPos, pos - oldPos));
|
nuclear@0
|
533 newstr.append(SpecialCharacters[specialChar][0]);
|
nuclear@0
|
534 pos += SpecialCharacters[specialChar].size();
|
nuclear@0
|
535 }
|
nuclear@0
|
536 else
|
nuclear@0
|
537 {
|
nuclear@0
|
538 newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
|
nuclear@0
|
539 pos += 1;
|
nuclear@0
|
540 }
|
nuclear@0
|
541
|
nuclear@0
|
542 // find next &
|
nuclear@0
|
543 oldPos = pos;
|
nuclear@0
|
544 pos = origstr.findNext(L'&', pos);
|
nuclear@0
|
545 }
|
nuclear@0
|
546
|
nuclear@0
|
547 if (oldPos < origstr.size()-1)
|
nuclear@0
|
548 newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));
|
nuclear@0
|
549
|
nuclear@0
|
550 return newstr;
|
nuclear@0
|
551 }
|
nuclear@0
|
552
|
nuclear@0
|
553
|
nuclear@0
|
554
|
nuclear@0
|
555 //! reads the xml file and converts it into the wanted character format.
|
nuclear@0
|
556 bool readFile(IFileReadCallBack* callback)
|
nuclear@0
|
557 {
|
nuclear@0
|
558 int size = callback->getSize();
|
nuclear@0
|
559 size += 4; // We need two terminating 0's at the end.
|
nuclear@0
|
560 // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4.
|
nuclear@0
|
561
|
nuclear@0
|
562 char* data8 = new char[size];
|
nuclear@0
|
563
|
nuclear@0
|
564 if (!callback->read(data8, size-4))
|
nuclear@0
|
565 {
|
nuclear@0
|
566 delete [] data8;
|
nuclear@0
|
567 return false;
|
nuclear@0
|
568 }
|
nuclear@0
|
569
|
nuclear@0
|
570 // add zeros at end
|
nuclear@0
|
571
|
nuclear@0
|
572 data8[size-1] = 0;
|
nuclear@0
|
573 data8[size-2] = 0;
|
nuclear@0
|
574 data8[size-3] = 0;
|
nuclear@0
|
575 data8[size-4] = 0;
|
nuclear@0
|
576
|
nuclear@0
|
577 char16* data16 = reinterpret_cast<char16*>(data8);
|
nuclear@0
|
578 char32* data32 = reinterpret_cast<char32*>(data8);
|
nuclear@0
|
579
|
nuclear@0
|
580 // now we need to convert the data to the desired target format
|
nuclear@0
|
581 // based on the byte order mark.
|
nuclear@0
|
582
|
nuclear@0
|
583 const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF;
|
nuclear@0
|
584 const int UTF16_BE = 0xFFFE;
|
nuclear@0
|
585 const int UTF16_LE = 0xFEFF;
|
nuclear@0
|
586 const int UTF32_BE = 0xFFFE0000;
|
nuclear@0
|
587 const int UTF32_LE = 0x0000FEFF;
|
nuclear@0
|
588
|
nuclear@0
|
589 // check source for all utf versions and convert to target data format
|
nuclear@0
|
590
|
nuclear@0
|
591 if (size >= 4 && data32[0] == (char32)UTF32_BE)
|
nuclear@0
|
592 {
|
nuclear@0
|
593 // UTF-32, big endian
|
nuclear@0
|
594 SourceFormat = ETF_UTF32_BE;
|
nuclear@0
|
595 convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
|
nuclear@0
|
596 }
|
nuclear@0
|
597 else
|
nuclear@0
|
598 if (size >= 4 && data32[0] == (char32)UTF32_LE)
|
nuclear@0
|
599 {
|
nuclear@0
|
600 // UTF-32, little endian
|
nuclear@0
|
601 SourceFormat = ETF_UTF32_LE;
|
nuclear@0
|
602 convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
|
nuclear@0
|
603 }
|
nuclear@0
|
604 else
|
nuclear@0
|
605 if (size >= 2 && data16[0] == UTF16_BE)
|
nuclear@0
|
606 {
|
nuclear@0
|
607 // UTF-16, big endian
|
nuclear@0
|
608 SourceFormat = ETF_UTF16_BE;
|
nuclear@0
|
609 convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
|
nuclear@0
|
610 }
|
nuclear@0
|
611 else
|
nuclear@0
|
612 if (size >= 2 && data16[0] == UTF16_LE)
|
nuclear@0
|
613 {
|
nuclear@0
|
614 // UTF-16, little endian
|
nuclear@0
|
615 SourceFormat = ETF_UTF16_LE;
|
nuclear@0
|
616 convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
|
nuclear@0
|
617 }
|
nuclear@0
|
618 else
|
nuclear@0
|
619 if (size >= 3 && data8[0] == UTF8[0] && data8[1] == UTF8[1] && data8[2] == UTF8[2])
|
nuclear@0
|
620 {
|
nuclear@0
|
621 // UTF-8
|
nuclear@0
|
622 SourceFormat = ETF_UTF8;
|
nuclear@0
|
623 convertTextData(data8+3, data8, size); // data8+3 because we need to skip the header
|
nuclear@0
|
624 }
|
nuclear@0
|
625 else
|
nuclear@0
|
626 {
|
nuclear@0
|
627 // ASCII
|
nuclear@0
|
628 SourceFormat = ETF_ASCII;
|
nuclear@0
|
629 convertTextData(data8, data8, size);
|
nuclear@0
|
630 }
|
nuclear@0
|
631
|
nuclear@0
|
632 return true;
|
nuclear@0
|
633 }
|
nuclear@0
|
634
|
nuclear@0
|
635
|
nuclear@0
|
636 //! converts the text file into the desired format.
|
nuclear@0
|
637 //! \param source: begin of the text (without byte order mark)
|
nuclear@0
|
638 //! \param pointerToStore: pointer to text data block which can be
|
nuclear@0
|
639 //! stored or deleted based on the nesessary conversion.
|
nuclear@0
|
640 //! \param sizeWithoutHeader: Text size in characters without header
|
nuclear@0
|
641 template<class src_char_type>
|
nuclear@0
|
642 void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)
|
nuclear@0
|
643 {
|
nuclear@0
|
644 // convert little to big endian if necessary
|
nuclear@0
|
645 if (sizeof(src_char_type) > 1 &&
|
nuclear@0
|
646 isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))
|
nuclear@0
|
647 convertToLittleEndian(source);
|
nuclear@0
|
648
|
nuclear@0
|
649 // check if conversion is necessary:
|
nuclear@0
|
650 if (sizeof(src_char_type) == sizeof(char_type))
|
nuclear@0
|
651 {
|
nuclear@0
|
652 // no need to convert
|
nuclear@0
|
653 TextBegin = (char_type*)source;
|
nuclear@0
|
654 TextData = (char_type*)pointerToStore;
|
nuclear@0
|
655 TextSize = sizeWithoutHeader;
|
nuclear@0
|
656 }
|
nuclear@0
|
657 else
|
nuclear@0
|
658 {
|
nuclear@0
|
659 // convert source into target data format.
|
nuclear@0
|
660 // TODO: implement a real conversion. This one just
|
nuclear@0
|
661 // copies bytes. This is a problem when there are
|
nuclear@0
|
662 // unicode symbols using more than one character.
|
nuclear@0
|
663
|
nuclear@0
|
664 TextData = new char_type[sizeWithoutHeader];
|
nuclear@0
|
665
|
nuclear@0
|
666 // MSVC debugger complains here about loss of data ...
|
nuclear@0
|
667
|
nuclear@0
|
668
|
nuclear@0
|
669 // FIXME - gcc complains about 'shift width larger than width of type'
|
nuclear@0
|
670 // for T == unsigned long. Avoid it by messing around volatile ..
|
nuclear@0
|
671 volatile unsigned int c = 3;
|
nuclear@0
|
672 const src_char_type cc = (src_char_type)((((uint64_t)1u << (sizeof( char_type)<<c)) - 1));
|
nuclear@0
|
673 for (int i=0; i<sizeWithoutHeader; ++i)
|
nuclear@0
|
674 TextData[i] = char_type( source[i] & cc);
|
nuclear@0
|
675
|
nuclear@0
|
676 TextBegin = TextData;
|
nuclear@0
|
677 TextSize = sizeWithoutHeader;
|
nuclear@0
|
678
|
nuclear@0
|
679 // delete original data because no longer needed
|
nuclear@0
|
680 delete [] pointerToStore;
|
nuclear@0
|
681 }
|
nuclear@0
|
682 }
|
nuclear@0
|
683
|
nuclear@0
|
684 //! converts whole text buffer to little endian
|
nuclear@0
|
685 template<class src_char_type>
|
nuclear@0
|
686 void convertToLittleEndian(src_char_type* t)
|
nuclear@0
|
687 {
|
nuclear@0
|
688 if (sizeof(src_char_type) == 4)
|
nuclear@0
|
689 {
|
nuclear@0
|
690 // 32 bit
|
nuclear@0
|
691
|
nuclear@0
|
692 while(*t)
|
nuclear@0
|
693 {
|
nuclear@0
|
694 *t = ((*t & 0xff000000) >> 24) |
|
nuclear@0
|
695 ((*t & 0x00ff0000) >> 8) |
|
nuclear@0
|
696 ((*t & 0x0000ff00) << 8) |
|
nuclear@0
|
697 ((*t & 0x000000ff) << 24);
|
nuclear@0
|
698 ++t;
|
nuclear@0
|
699 }
|
nuclear@0
|
700 }
|
nuclear@0
|
701 else
|
nuclear@0
|
702 {
|
nuclear@0
|
703 // 16 bit
|
nuclear@0
|
704
|
nuclear@0
|
705 while(*t)
|
nuclear@0
|
706 {
|
nuclear@0
|
707 *t = (*t >> 8) | (*t << 8);
|
nuclear@0
|
708 ++t;
|
nuclear@0
|
709 }
|
nuclear@0
|
710 }
|
nuclear@0
|
711 }
|
nuclear@0
|
712
|
nuclear@0
|
713 //! returns if a format is little endian
|
nuclear@0
|
714 inline bool isLittleEndian(ETEXT_FORMAT f)
|
nuclear@0
|
715 {
|
nuclear@0
|
716 return f == ETF_ASCII ||
|
nuclear@0
|
717 f == ETF_UTF8 ||
|
nuclear@0
|
718 f == ETF_UTF16_LE ||
|
nuclear@0
|
719 f == ETF_UTF32_LE;
|
nuclear@0
|
720 }
|
nuclear@0
|
721
|
nuclear@0
|
722
|
nuclear@0
|
723 //! returns true if a character is whitespace
|
nuclear@0
|
724 inline bool isWhiteSpace(char_type c)
|
nuclear@0
|
725 {
|
nuclear@0
|
726 return (c==' ' || c=='\t' || c=='\n' || c=='\r');
|
nuclear@0
|
727 }
|
nuclear@0
|
728
|
nuclear@0
|
729
|
nuclear@0
|
730 //! generates a list with xml special characters
|
nuclear@0
|
731 void createSpecialCharacterList()
|
nuclear@0
|
732 {
|
nuclear@0
|
733 // list of strings containing special symbols,
|
nuclear@0
|
734 // the first character is the special character,
|
nuclear@0
|
735 // the following is the symbol string without trailing &.
|
nuclear@0
|
736
|
nuclear@0
|
737 SpecialCharacters.push_back("&");
|
nuclear@0
|
738 SpecialCharacters.push_back("<lt;");
|
nuclear@0
|
739 SpecialCharacters.push_back(">gt;");
|
nuclear@0
|
740 SpecialCharacters.push_back("\"quot;");
|
nuclear@0
|
741 SpecialCharacters.push_back("'apos;");
|
nuclear@0
|
742
|
nuclear@0
|
743 }
|
nuclear@0
|
744
|
nuclear@0
|
745
|
nuclear@0
|
746 //! compares the first n characters of the strings
|
nuclear@0
|
747 bool equalsn(const char_type* str1, const char_type* str2, int len)
|
nuclear@0
|
748 {
|
nuclear@0
|
749 int i;
|
nuclear@0
|
750 for(i=0; str1[i] && str2[i] && i < len; ++i)
|
nuclear@0
|
751 if (str1[i] != str2[i])
|
nuclear@0
|
752 return false;
|
nuclear@0
|
753
|
nuclear@0
|
754 // if one (or both) of the strings was smaller then they
|
nuclear@0
|
755 // are only equal if they have the same lenght
|
nuclear@0
|
756 return (i == len) || (str1[i] == 0 && str2[i] == 0);
|
nuclear@0
|
757 }
|
nuclear@0
|
758
|
nuclear@0
|
759
|
nuclear@0
|
760 //! stores the target text format
|
nuclear@0
|
761 void storeTargetFormat()
|
nuclear@0
|
762 {
|
nuclear@0
|
763 // get target format. We could have done this using template specialization,
|
nuclear@0
|
764 // but VisualStudio 6 don't like it and we want to support it.
|
nuclear@0
|
765
|
nuclear@0
|
766 switch(sizeof(char_type))
|
nuclear@0
|
767 {
|
nuclear@0
|
768 case 1:
|
nuclear@0
|
769 TargetFormat = ETF_UTF8;
|
nuclear@0
|
770 break;
|
nuclear@0
|
771 case 2:
|
nuclear@0
|
772 TargetFormat = ETF_UTF16_LE;
|
nuclear@0
|
773 break;
|
nuclear@0
|
774 case 4:
|
nuclear@0
|
775 TargetFormat = ETF_UTF32_LE;
|
nuclear@0
|
776 break;
|
nuclear@0
|
777 default:
|
nuclear@0
|
778 TargetFormat = ETF_ASCII; // should never happen.
|
nuclear@0
|
779 }
|
nuclear@0
|
780 }
|
nuclear@0
|
781
|
nuclear@0
|
782
|
nuclear@0
|
783 // instance variables:
|
nuclear@0
|
784
|
nuclear@0
|
785 char_type* TextData; // data block of the text file
|
nuclear@0
|
786 char_type* P; // current point in text to parse
|
nuclear@0
|
787 char_type* TextBegin; // start of text to parse
|
nuclear@0
|
788 unsigned int TextSize; // size of text to parse in characters, not bytes
|
nuclear@0
|
789
|
nuclear@0
|
790 EXML_NODE CurrentNodeType; // type of the currently parsed node
|
nuclear@0
|
791 ETEXT_FORMAT SourceFormat; // source format of the xml file
|
nuclear@0
|
792 ETEXT_FORMAT TargetFormat; // output format of this parser
|
nuclear@0
|
793
|
nuclear@0
|
794 core::string<char_type> NodeName; // name of the node currently in
|
nuclear@0
|
795 core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods
|
nuclear@0
|
796
|
nuclear@0
|
797 bool IsEmptyElement; // is the currently parsed node empty?
|
nuclear@0
|
798
|
nuclear@0
|
799 core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList()
|
nuclear@0
|
800
|
nuclear@0
|
801 core::array<SAttribute> Attributes; // attributes of current element
|
nuclear@0
|
802
|
nuclear@0
|
803 }; // end CXMLReaderImpl
|
nuclear@0
|
804
|
nuclear@0
|
805
|
nuclear@0
|
806 } // end namespace
|
nuclear@0
|
807 } // end namespace
|
nuclear@0
|
808
|
nuclear@0
|
809 #endif
|