rev |
line source |
nuclear@0
|
1 /*
|
nuclear@0
|
2 Open Asset Import Library (assimp)
|
nuclear@0
|
3 ----------------------------------------------------------------------
|
nuclear@0
|
4
|
nuclear@0
|
5 Copyright (c) 2006-2012, assimp team
|
nuclear@0
|
6 All rights reserved.
|
nuclear@0
|
7
|
nuclear@0
|
8 Redistribution and use of this software in source and binary forms,
|
nuclear@0
|
9 with or without modification, are permitted provided that the
|
nuclear@0
|
10 following conditions are met:
|
nuclear@0
|
11
|
nuclear@0
|
12 * Redistributions of source code must retain the above
|
nuclear@0
|
13 copyright notice, this list of conditions and the
|
nuclear@0
|
14 following disclaimer.
|
nuclear@0
|
15
|
nuclear@0
|
16 * Redistributions in binary form must reproduce the above
|
nuclear@0
|
17 copyright notice, this list of conditions and the
|
nuclear@0
|
18 following disclaimer in the documentation and/or other
|
nuclear@0
|
19 materials provided with the distribution.
|
nuclear@0
|
20
|
nuclear@0
|
21 * Neither the name of the assimp team, nor the names of its
|
nuclear@0
|
22 contributors may be used to endorse or promote products
|
nuclear@0
|
23 derived from this software without specific prior
|
nuclear@0
|
24 written permission of the assimp team.
|
nuclear@0
|
25
|
nuclear@0
|
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
nuclear@0
|
27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
nuclear@0
|
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
nuclear@0
|
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
nuclear@0
|
30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
nuclear@0
|
31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
nuclear@0
|
32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
nuclear@0
|
33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
nuclear@0
|
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
nuclear@0
|
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
nuclear@0
|
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
nuclear@0
|
37
|
nuclear@0
|
38 ----------------------------------------------------------------------
|
nuclear@0
|
39 */
|
nuclear@0
|
40 /** @file FBXBinaryTokenizer.cpp
|
nuclear@0
|
41 * @brief Implementation of a fake lexer for binary fbx files -
|
nuclear@0
|
42 * we emit tokens so the parser needs almost no special handling
|
nuclear@0
|
43 * for binary files.
|
nuclear@0
|
44 */
|
nuclear@0
|
45 #include "AssimpPCH.h"
|
nuclear@0
|
46
|
nuclear@0
|
47 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
|
nuclear@0
|
48
|
nuclear@0
|
49 #include "FBXTokenizer.h"
|
nuclear@0
|
50 #include "FBXUtil.h"
|
nuclear@0
|
51
|
nuclear@0
|
52 namespace Assimp {
|
nuclear@0
|
53 namespace FBX {
|
nuclear@0
|
54
|
nuclear@0
|
55
|
nuclear@0
|
56 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
57 Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
|
nuclear@0
|
58 : sbegin(sbegin)
|
nuclear@0
|
59 , send(send)
|
nuclear@0
|
60 , type(type)
|
nuclear@0
|
61 , line(offset)
|
nuclear@0
|
62 , column(BINARY_MARKER)
|
nuclear@0
|
63 #ifdef DEBUG
|
nuclear@0
|
64 , contents(sbegin, static_cast<size_t>(send-sbegin))
|
nuclear@0
|
65 #endif
|
nuclear@0
|
66 {
|
nuclear@0
|
67 ai_assert(sbegin);
|
nuclear@0
|
68 ai_assert(send);
|
nuclear@0
|
69
|
nuclear@0
|
70 // binary tokens may have zero length because they are sometimes dummies
|
nuclear@0
|
71 // inserted by TokenizeBinary()
|
nuclear@0
|
72 ai_assert(send >= sbegin);
|
nuclear@0
|
73 }
|
nuclear@0
|
74
|
nuclear@0
|
75
|
nuclear@0
|
76 namespace {
|
nuclear@0
|
77
|
nuclear@0
|
78 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
79 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
|
nuclear@0
|
80 void TokenizeError(const std::string& message, unsigned int offset)
|
nuclear@0
|
81 {
|
nuclear@0
|
82 throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
|
nuclear@0
|
83 }
|
nuclear@0
|
84
|
nuclear@0
|
85
|
nuclear@0
|
86 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
87 uint32_t Offset(const char* begin, const char* cursor)
|
nuclear@0
|
88 {
|
nuclear@0
|
89 ai_assert(begin <= cursor);
|
nuclear@0
|
90 return static_cast<unsigned int>(cursor - begin);
|
nuclear@0
|
91 }
|
nuclear@0
|
92
|
nuclear@0
|
93
|
nuclear@0
|
94 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
95 void TokenizeError(const std::string& message, const char* begin, const char* cursor)
|
nuclear@0
|
96 {
|
nuclear@0
|
97 TokenizeError(message, Offset(begin, cursor));
|
nuclear@0
|
98 }
|
nuclear@0
|
99
|
nuclear@0
|
100
|
nuclear@0
|
101 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
102 uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
|
nuclear@0
|
103 {
|
nuclear@0
|
104 if(Offset(cursor, end) < 4) {
|
nuclear@0
|
105 TokenizeError("cannot ReadWord, out of bounds",input, cursor);
|
nuclear@0
|
106 }
|
nuclear@0
|
107
|
nuclear@0
|
108 uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
|
nuclear@0
|
109 AI_SWAP4(word);
|
nuclear@0
|
110
|
nuclear@0
|
111 cursor += 4;
|
nuclear@0
|
112
|
nuclear@0
|
113 return word;
|
nuclear@0
|
114 }
|
nuclear@0
|
115
|
nuclear@0
|
116
|
nuclear@0
|
117 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
118 uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
|
nuclear@0
|
119 {
|
nuclear@0
|
120 if(Offset(cursor, end) < 1) {
|
nuclear@0
|
121 TokenizeError("cannot ReadByte, out of bounds",input, cursor);
|
nuclear@0
|
122 }
|
nuclear@0
|
123
|
nuclear@0
|
124 uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
|
nuclear@0
|
125 ++cursor;
|
nuclear@0
|
126
|
nuclear@0
|
127 return word;
|
nuclear@0
|
128 }
|
nuclear@0
|
129
|
nuclear@0
|
130
|
nuclear@0
|
131 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
132 unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end,
|
nuclear@0
|
133 bool long_length = false,
|
nuclear@0
|
134 bool allow_null = false)
|
nuclear@0
|
135 {
|
nuclear@0
|
136 const uint32_t len_len = long_length ? 4 : 1;
|
nuclear@0
|
137 if(Offset(cursor, end) < len_len) {
|
nuclear@0
|
138 TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
|
nuclear@0
|
139 }
|
nuclear@0
|
140
|
nuclear@0
|
141 const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
|
nuclear@0
|
142
|
nuclear@0
|
143 if (Offset(cursor, end) < length) {
|
nuclear@0
|
144 TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
|
nuclear@0
|
145 }
|
nuclear@0
|
146
|
nuclear@0
|
147 sbegin_out = cursor;
|
nuclear@0
|
148 cursor += length;
|
nuclear@0
|
149
|
nuclear@0
|
150 send_out = cursor;
|
nuclear@0
|
151
|
nuclear@0
|
152 if(!allow_null) {
|
nuclear@0
|
153 for (unsigned int i = 0; i < length; ++i) {
|
nuclear@0
|
154 if(sbegin_out[i] == '\0') {
|
nuclear@0
|
155 TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
|
nuclear@0
|
156 }
|
nuclear@0
|
157 }
|
nuclear@0
|
158 }
|
nuclear@0
|
159
|
nuclear@0
|
160 return length;
|
nuclear@0
|
161 }
|
nuclear@0
|
162
|
nuclear@0
|
163
|
nuclear@0
|
164
|
nuclear@0
|
165 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
166 void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
|
nuclear@0
|
167 {
|
nuclear@0
|
168 if(Offset(cursor, end) < 1) {
|
nuclear@0
|
169 TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
|
nuclear@0
|
170 }
|
nuclear@0
|
171
|
nuclear@0
|
172 const char type = *cursor;
|
nuclear@0
|
173 sbegin_out = cursor++;
|
nuclear@0
|
174
|
nuclear@0
|
175 switch(type)
|
nuclear@0
|
176 {
|
nuclear@0
|
177 // 16 bit int
|
nuclear@0
|
178 case 'Y':
|
nuclear@0
|
179 cursor += 2;
|
nuclear@0
|
180 break;
|
nuclear@0
|
181
|
nuclear@0
|
182 // 1 bit bool flag (yes/no)
|
nuclear@0
|
183 case 'C':
|
nuclear@0
|
184 cursor += 1;
|
nuclear@0
|
185 break;
|
nuclear@0
|
186
|
nuclear@0
|
187 // 32 bit int
|
nuclear@0
|
188 case 'I':
|
nuclear@0
|
189 // <- fall thru
|
nuclear@0
|
190
|
nuclear@0
|
191 // float
|
nuclear@0
|
192 case 'F':
|
nuclear@0
|
193 cursor += 4;
|
nuclear@0
|
194 break;
|
nuclear@0
|
195
|
nuclear@0
|
196 // double
|
nuclear@0
|
197 case 'D':
|
nuclear@0
|
198 cursor += 8;
|
nuclear@0
|
199 break;
|
nuclear@0
|
200
|
nuclear@0
|
201 // 64 bit int
|
nuclear@0
|
202 case 'L':
|
nuclear@0
|
203 cursor += 8;
|
nuclear@0
|
204 break;
|
nuclear@0
|
205
|
nuclear@0
|
206 // note: do not write cursor += ReadWord(...cursor) as this would be UB
|
nuclear@0
|
207
|
nuclear@0
|
208 // raw binary data
|
nuclear@0
|
209 case 'R':
|
nuclear@0
|
210 {
|
nuclear@0
|
211 const uint32_t length = ReadWord(input, cursor, end);
|
nuclear@0
|
212 cursor += length;
|
nuclear@0
|
213 break;
|
nuclear@0
|
214 }
|
nuclear@0
|
215
|
nuclear@0
|
216 case 'b':
|
nuclear@0
|
217 // TODO: what is the 'b' type code? Right now we just skip over it /
|
nuclear@0
|
218 // take the full range we could get
|
nuclear@0
|
219 cursor = end;
|
nuclear@0
|
220 break;
|
nuclear@0
|
221
|
nuclear@0
|
222 // array of *
|
nuclear@0
|
223 case 'f':
|
nuclear@0
|
224 case 'd':
|
nuclear@0
|
225 case 'l':
|
nuclear@0
|
226 case 'i': {
|
nuclear@0
|
227
|
nuclear@0
|
228 const uint32_t length = ReadWord(input, cursor, end);
|
nuclear@0
|
229 const uint32_t encoding = ReadWord(input, cursor, end);
|
nuclear@0
|
230
|
nuclear@0
|
231 const uint32_t comp_len = ReadWord(input, cursor, end);
|
nuclear@0
|
232
|
nuclear@0
|
233 // compute length based on type and check against the stored value
|
nuclear@0
|
234 if(encoding == 0) {
|
nuclear@0
|
235 uint32_t stride;
|
nuclear@0
|
236 switch(type)
|
nuclear@0
|
237 {
|
nuclear@0
|
238 case 'f':
|
nuclear@0
|
239 case 'i':
|
nuclear@0
|
240 stride = 4;
|
nuclear@0
|
241 break;
|
nuclear@0
|
242
|
nuclear@0
|
243 case 'd':
|
nuclear@0
|
244 case 'l':
|
nuclear@0
|
245 stride = 8;
|
nuclear@0
|
246 break;
|
nuclear@0
|
247
|
nuclear@0
|
248 default:
|
nuclear@0
|
249 ai_assert(false);
|
nuclear@0
|
250 };
|
nuclear@0
|
251 if(length * stride != comp_len) {
|
nuclear@0
|
252 TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
|
nuclear@0
|
253 }
|
nuclear@0
|
254 }
|
nuclear@0
|
255 // zip/deflate algorithm (encoding==1)? take given length. anything else? die
|
nuclear@0
|
256 else if (encoding != 1) {
|
nuclear@0
|
257 TokenizeError("cannot ReadData, unknown encoding",input, cursor);
|
nuclear@0
|
258 }
|
nuclear@0
|
259 cursor += comp_len;
|
nuclear@0
|
260 break;
|
nuclear@0
|
261 }
|
nuclear@0
|
262
|
nuclear@0
|
263 // string
|
nuclear@0
|
264 case 'S': {
|
nuclear@0
|
265 const char* sb, *se;
|
nuclear@0
|
266 // 0 characters can legally happen in such strings
|
nuclear@0
|
267 ReadString(sb, se, input, cursor, end, true, true);
|
nuclear@0
|
268 break;
|
nuclear@0
|
269 }
|
nuclear@0
|
270 default:
|
nuclear@0
|
271 TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
|
nuclear@0
|
272 }
|
nuclear@0
|
273
|
nuclear@0
|
274 if(cursor > end) {
|
nuclear@0
|
275 TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
|
nuclear@0
|
276 }
|
nuclear@0
|
277
|
nuclear@0
|
278 // the type code is contained in the returned range
|
nuclear@0
|
279 send_out = cursor;
|
nuclear@0
|
280 }
|
nuclear@0
|
281
|
nuclear@0
|
282
|
nuclear@0
|
283 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
284 bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
|
nuclear@0
|
285 {
|
nuclear@0
|
286 // the first word contains the offset at which this block ends
|
nuclear@0
|
287 const uint32_t end_offset = ReadWord(input, cursor, end);
|
nuclear@0
|
288
|
nuclear@0
|
289 // we may get 0 if reading reached the end of the file -
|
nuclear@0
|
290 // fbx files have a mysterious extra footer which I don't know
|
nuclear@0
|
291 // how to extract any information from, but at least it always
|
nuclear@0
|
292 // starts with a 0.
|
nuclear@0
|
293 if(!end_offset) {
|
nuclear@0
|
294 return false;
|
nuclear@0
|
295 }
|
nuclear@0
|
296
|
nuclear@0
|
297 if(end_offset > Offset(input, end)) {
|
nuclear@0
|
298 TokenizeError("block offset is out of range",input, cursor);
|
nuclear@0
|
299 }
|
nuclear@0
|
300 else if(end_offset < Offset(input, cursor)) {
|
nuclear@0
|
301 TokenizeError("block offset is negative out of range",input, cursor);
|
nuclear@0
|
302 }
|
nuclear@0
|
303
|
nuclear@0
|
304 // the second data word contains the number of properties in the scope
|
nuclear@0
|
305 const uint32_t prop_count = ReadWord(input, cursor, end);
|
nuclear@0
|
306
|
nuclear@0
|
307 // the third data word contains the length of the property list
|
nuclear@0
|
308 const uint32_t prop_length = ReadWord(input, cursor, end);
|
nuclear@0
|
309
|
nuclear@0
|
310 // now comes the name of the scope/key
|
nuclear@0
|
311 const char* sbeg, *send;
|
nuclear@0
|
312 ReadString(sbeg, send, input, cursor, end);
|
nuclear@0
|
313
|
nuclear@0
|
314 output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
|
nuclear@0
|
315
|
nuclear@0
|
316 // now come the individual properties
|
nuclear@0
|
317 const char* begin_cursor = cursor;
|
nuclear@0
|
318 for (unsigned int i = 0; i < prop_count; ++i) {
|
nuclear@0
|
319 ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
|
nuclear@0
|
320
|
nuclear@0
|
321 output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
|
nuclear@0
|
322
|
nuclear@0
|
323 if(i != prop_count-1) {
|
nuclear@0
|
324 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
|
nuclear@0
|
325 }
|
nuclear@0
|
326 }
|
nuclear@0
|
327
|
nuclear@0
|
328 if (Offset(begin_cursor, cursor) != prop_length) {
|
nuclear@0
|
329 TokenizeError("property length not reached, something is wrong",input, cursor);
|
nuclear@0
|
330 }
|
nuclear@0
|
331
|
nuclear@0
|
332 // at the end of each nested block, there is a NUL record to indicate
|
nuclear@0
|
333 // that the sub-scope exists (i.e. to distinguish between P: and P : {})
|
nuclear@0
|
334 // this NUL record is 13 bytes long.
|
nuclear@0
|
335 #define BLOCK_SENTINEL_LENGTH 13
|
nuclear@0
|
336
|
nuclear@0
|
337 if (Offset(input, cursor) < end_offset) {
|
nuclear@0
|
338
|
nuclear@0
|
339 if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
|
nuclear@0
|
340 TokenizeError("insufficient padding bytes at block end",input, cursor);
|
nuclear@0
|
341 }
|
nuclear@0
|
342
|
nuclear@0
|
343 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
|
nuclear@0
|
344
|
nuclear@0
|
345 // XXX this is vulnerable to stack overflowing ..
|
nuclear@0
|
346 while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
|
nuclear@0
|
347 ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
|
nuclear@0
|
348 }
|
nuclear@0
|
349 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
|
nuclear@0
|
350
|
nuclear@0
|
351 for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
|
nuclear@0
|
352 if(cursor[i] != '\0') {
|
nuclear@0
|
353 TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
|
nuclear@0
|
354 }
|
nuclear@0
|
355 }
|
nuclear@0
|
356 cursor += BLOCK_SENTINEL_LENGTH;
|
nuclear@0
|
357 }
|
nuclear@0
|
358
|
nuclear@0
|
359 if (Offset(input, cursor) != end_offset) {
|
nuclear@0
|
360 TokenizeError("scope length not reached, something is wrong",input, cursor);
|
nuclear@0
|
361 }
|
nuclear@0
|
362
|
nuclear@0
|
363 return true;
|
nuclear@0
|
364 }
|
nuclear@0
|
365
|
nuclear@0
|
366
|
nuclear@0
|
367 }
|
nuclear@0
|
368
|
nuclear@0
|
369 // ------------------------------------------------------------------------------------------------
|
nuclear@0
|
370 void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
|
nuclear@0
|
371 {
|
nuclear@0
|
372 ai_assert(input);
|
nuclear@0
|
373
|
nuclear@0
|
374 if(length < 0x1b) {
|
nuclear@0
|
375 TokenizeError("file is too short",0);
|
nuclear@0
|
376 }
|
nuclear@0
|
377
|
nuclear@0
|
378 if (strncmp(input,"Kaydara FBX Binary",18)) {
|
nuclear@0
|
379 TokenizeError("magic bytes not found",0);
|
nuclear@0
|
380 }
|
nuclear@0
|
381
|
nuclear@0
|
382
|
nuclear@0
|
383 //uint32_t offset = 0x1b;
|
nuclear@0
|
384
|
nuclear@0
|
385 const char* cursor = input + 0x1b;
|
nuclear@0
|
386
|
nuclear@0
|
387 while (cursor < input + length) {
|
nuclear@0
|
388 if(!ReadScope(output_tokens, input, cursor, input + length)) {
|
nuclear@0
|
389 break;
|
nuclear@0
|
390 }
|
nuclear@0
|
391 }
|
nuclear@0
|
392 }
|
nuclear@0
|
393
|
nuclear@0
|
394 } // !FBX
|
nuclear@0
|
395 } // !Assimp
|
nuclear@0
|
396
|
nuclear@0
|
397 #endif |